/ /
suppressMessages(library(ggplot2))
suppressMessages(library(ape))
suppressMessages(library(limma))
suppressMessages(library(edgeR))
suppressMessages(library(statmod))
suppressMessages(library(stats))
suppressMessages(library(dplyr))
suppressMessages(library(stringr))
suppressMessages(library(tibble))
suppressMessages(library(ggforce))
suppressMessages(library(WGCNA))
suppressMessages(library(km2gcn))
suppressMessages(library(CoExpNets))
suppressMessages(library(devtools))
suppressMessages(library(UpSetR))
suppressMessages(library(RCy3))
suppressMessages(library(readr))
suppressMessages(library(scales))
suppressMessages(library(data.table))
suppressMessages(library(forcats))
suppressMessages(library(kableExtra))
setwd("/Users/jessicamitchell/Library/CloudStorage/OneDrive-HarvardUniversity/OD_documents/Cfixpaper")
rsem_gene_data <- read.table("epersephone_rsem_gene_count_proteinCODINGonly_2022.08.14.matrix")
s2c<-read.csv("rsem_gene_info.csv",header = TRUE, stringsAsFactors=FALSE)
s2c$sample
## [1] "sample103" "sample106" "sample112" "sample115" "sample119"
## [6] "sample121" "sample127" "sample1294" "sample1300" "sample1310"
## [11] "sample1324" "sample1330" "sample1346" "sample1347" "sample1353"
## [16] "sample1369" "sample136" "sample16" "sample1750" "sample1871"
## [21] "sample1876" "sample28" "sample36" "sample54" "sample56"
## [26] "sample58" "sample69" "sample72" "sample78" "sample94"
rnaseqMatrix=round(rsem_gene_data)
filter=rowSums(cpm(rnaseqMatrix)>=1)>=3
summary(filter)
## Mode FALSE TRUE
## logical 28 3140
cpm_filtered_matrix=rnaseqMatrix[filter,]
#cpm_filtered_matrix
DGE<-DGEList(cpm_filtered_matrix)
DGE<-calcNormFactors(DGE,method =c("TMM"))
#DGE
treatvals<-s2c$condition
design_condition=model.matrix(~condition, data=s2c)
RNAseq_voom <- voomWithQualityWeights(DGE, design=design_condition,normalize.method="none")$E
###this filters out the genes that have less variation in expression
WGCNA_matrix = t(RNAseq_voom[order(apply(RNAseq_voom,1,mad), decreasing = T)[1:2500],])
WGCNA_matrix <- as.data.frame(WGCNA_matrix)
row.names(WGCNA_matrix) = s2c$sample
names(s2c)
## [1] "sample" "condition" "reps" "size" "size_cat"
## [6] "troph_color" "X" "X.1" "X.2" "X.3"
## [11] "X.4"
###remove the blank columns
s2c <- s2c[,1:6]
sample_rownames <- rownames(WGCNA_matrix)
s2c2 <- s2c[, -1]
rownames(s2c2) <- sample_rownames
#removing rep and size categories
s2c3 <- s2c2[,c(1,4:5)]
names(s2c3)
## [1] "condition" "size_cat" "troph_color"
####making this binary data for downstream analyses
s2c4 <- s2c3 %>%
mutate(Sulfide = case_when(
startsWith(condition, "HS") ~ "1",
startsWith(condition, "LS") ~ "0",
startsWith(condition, "SW") ~ "0",
startsWith(condition, "H2")~ "0"
))
s2c5 <- s2c4 %>%
mutate(Oxygen = case_when(
endsWith(condition, "HO") ~ "1",
endsWith(condition, "LO") ~ "0"
))
s2c6 <- s2c5 %>%
mutate(Nitrogen = case_when(
grepl("noN",condition) ~ "0",
grepl("_N",condition) ~ "1")
)
s2c7 <- s2c6 %>%
mutate(Hydrogen = case_when(
startsWith(condition, "HS") ~ "0",
startsWith(condition, "LS") ~ "0",
startsWith(condition, "SW") ~ "0",
startsWith(condition, "H2")~ "1"
))
s2c20 <- s2c7 %>%
mutate(Low_to_No_sulfide = case_when(
startsWith(condition, "HS") ~ "0",
startsWith(condition, "LS") ~ "1",
startsWith(condition, "SW") ~ "1",
startsWith(condition, "H2")~ "0"
))
s2c8 <- s2c20[,4:8]
s2c8$Sulfide <- as.numeric(unlist(s2c8$Sulfide))
s2c8$Oxygen <- as.numeric(unlist(s2c8$Oxygen))
s2c8$Nitrogen <- as.numeric(unlist(s2c8$Nitrogen))
s2c8$Hydrogen <- as.numeric(unlist(s2c8$Hydrogen))
s2c8$Low_to_No_sulfide <- as.numeric(unlist(s2c8$Low_to_No_sulfide))
str(s2c8)
## 'data.frame': 30 obs. of 5 variables:
## $ Sulfide : num 0 0 1 1 1 1 1 1 1 1 ...
## $ Oxygen : num 1 1 0 0 0 1 1 1 1 1 ...
## $ Nitrogen : num 1 1 1 1 1 1 1 0 0 0 ...
## $ Hydrogen : num 1 1 0 0 0 0 0 0 0 0 ...
## $ Low_to_No_sulfide: num 0 0 0 0 0 0 0 0 0 0 ...
s2c9 <- s2c3 %>%
mutate(H2_N_HO = case_when(
condition== "H2_N_HO" ~ "1",
TRUE ~ "0"))
s210 <- s2c9 %>%
mutate(HS_N_LO = case_when(
condition== "HS_N_LO" ~ "1",
TRUE ~ "0"))
s211 <- s210 %>%
mutate(HS_N_HO = case_when(
condition== "HS_N_HO" ~ "1",
TRUE ~ "0"))
s212 <- s211 %>%
mutate(HS_noN_HO = case_when(
condition== "HS_noN_HO" ~ "1",
TRUE ~ "0"))
s213 <- s212 %>%
mutate(LS_noN_HO = case_when(
condition== "LS_noN_HO" ~ "1",
TRUE ~ "0"))
s214 <- s213 %>%
mutate(SW_noN_HO = case_when(
condition== "SW_noN_HO" ~ "1",
TRUE ~ "0"))
s215 <- s214 %>%
mutate(LS_N_HO = case_when(
condition== "LS_N_HO" ~ "1",
TRUE ~ "0"))
s216 <- s215 %>%
mutate(H2_noN_HO = case_when(
condition== "H2_noN_HO" ~ "1",
TRUE ~ "0"))
s217 <- s216 %>%
mutate(H2_N_LO = case_when(
condition== "H2_N_LO" ~ "1",
TRUE ~ "0"))
s218 <- s217 %>%
mutate(H2_noN_LO = case_when(
condition== "H2_noN_LO" ~ "1",
TRUE ~ "0"))
names(s218)
## [1] "condition" "size_cat" "troph_color" "H2_N_HO" "HS_N_LO"
## [6] "HS_N_HO" "HS_noN_HO" "LS_noN_HO" "SW_noN_HO" "LS_N_HO"
## [11] "H2_noN_HO" "H2_N_LO" "H2_noN_LO"
s218$H2_N_HO <- as.numeric(unlist(s218$H2_N_HO))
s218$HS_N_LO <- as.numeric(unlist(s218$HS_N_LO))
s218$HS_N_HO <- as.numeric(unlist(s218$HS_N_HO))
s218$HS_noN_HO <- as.numeric(unlist(s218$HS_noN_HO))
s218$LS_noN_HO <- as.numeric(unlist(s218$LS_noN_HO))
s218$SW_noN_HO <- as.numeric(unlist(s218$SW_noN_HO))
s218$LS_N_HO <- as.numeric(unlist(s218$LS_N_HO))
s218$H2_noN_HO <- as.numeric(unlist(s218$H2_noN_HO))
s218$H2_N_LO <- as.numeric(unlist(s218$H2_N_LO))
s218$H2_noN_LO <- as.numeric(unlist(s218$H2_noN_LO))
s218 <- s218[,4:13]
biconditions <- cbind(s218,s2c8)
save(WGCNA_matrix, biconditions, file = "count_matrix_condition_info_dataInput.RData")
/
/
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
sft2 = pickSoftThreshold(WGCNA_matrix, powerVector = powers, networkType= "signed hybrid", verbose = 2)
## pickSoftThreshold: will use block size 2500.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 2500 of 2500
## Warning: executing %dopar% sequentially: no parallel backend registered
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.0489 -0.388 0.858 331.000 308.0000 639.00
## 2 2 0.4660 -1.170 0.913 124.000 112.0000 343.00
## 3 3 0.6690 -1.540 0.944 57.600 48.3000 211.00
## 4 4 0.7060 -1.770 0.933 30.400 23.5000 140.00
## 5 5 0.7310 -1.910 0.942 17.600 12.5000 96.90
## 6 6 0.7640 -1.900 0.956 10.900 7.0200 69.60
## 7 7 0.7900 -1.900 0.968 7.060 4.0600 51.30
## 8 8 0.8070 -1.890 0.970 4.800 2.4400 38.60
## 9 9 0.8150 -1.850 0.974 3.370 1.5300 29.50
## 10 10 0.8220 -1.830 0.972 2.440 0.9920 22.90
## 11 12 0.8720 -1.640 0.985 1.380 0.4430 14.30
## 12 14 0.8920 -1.490 0.975 0.839 0.2160 9.26
## 13 16 0.9150 -1.430 0.968 0.544 0.1110 6.68
## 14 18 0.9660 -1.380 0.991 0.371 0.0579 5.22
## 15 20 0.9190 -1.490 0.949 0.264 0.0317 4.66
# Plot the results:
par(mar = c(1,2,1,2));
cex1 = 0.8;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.8,col="red")
# Mean connectivity as a function of the soft-thresholding power
par(mar = c(1,2,1,2))
plot(sft2$fitIndices[,1], sft2$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft2$fitIndices[,1], sft2$fitIndices[,5], labels=powers, cex=cex1,col="red")
sft2$fitIndices
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k.
## 1 1 0.04890048 -0.3884992 0.8581139 331.4011205 308.13070231
## 2 2 0.46592537 -1.1680241 0.9132379 124.4904470 112.01173850
## 3 3 0.66927892 -1.5390034 0.9440341 57.6399451 48.32562217
## 4 4 0.70560384 -1.7734087 0.9325776 30.4063264 23.49079218
## 5 5 0.73115622 -1.9082518 0.9420155 17.5623528 12.50091882
## 6 6 0.76403365 -1.9033807 0.9561525 10.8503969 7.02207374
## 7 7 0.79011799 -1.8998930 0.9678497 7.0639452 4.05989014
## 8 8 0.80724037 -1.8863799 0.9695552 4.7967210 2.44030738
## 9 9 0.81539375 -1.8468855 0.9740395 3.3725516 1.52789332
## 10 10 0.82151935 -1.8250238 0.9724534 2.4419234 0.99168615
## 11 12 0.87162088 -1.6441349 0.9852418 1.3763842 0.44250659
## 12 14 0.89171300 -1.4932610 0.9749399 0.8387154 0.21564804
## 13 16 0.91539066 -1.4287488 0.9678064 0.5438888 0.11080318
## 14 18 0.96562166 -1.3772418 0.9913790 0.3711772 0.05787538
## 15 20 0.91949827 -1.4881952 0.9494624 0.2643675 0.03166435
## max.k.
## 1 638.846689
## 2 342.917019
## 3 210.869059
## 4 139.536080
## 5 96.870153
## 6 69.586561
## 7 51.295838
## 8 38.594403
## 9 29.529360
## 10 22.915384
## 11 14.292054
## 12 9.260525
## 13 6.684230
## 14 5.219041
## 15 4.660383
####choosing softpower to be 8
softPower = 8
ADJ1=abs(cor(WGCNA_matrix,use="p"))^8
k=as.vector(apply(ADJ1,2,sum, na.rm=T))
sizeGrWindow(10,5)
par(mfrow=c(1,2))
hist(k)
scaleFreePlot(k, main="Check scale free topology\n")
## scaleFreeRsquared slope
## 1 0.85 -1.93
/
/
set.seed(123)
softPower = 8;
adjacency = adjacency(WGCNA_matrix, power = softPower, type="signed hybrid")
dissTOM = TOMdist(adjacency, TOMType = "signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average");
# Plot the resulting clustering tree (dendrogram)
par(mar = c(3,3,3,3))
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
labels = FALSE, hang = 0.04);
minModuleSize = 30;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
## ..cutHeight not given, setting it to 0.998 ===> 99% of the (truncated) height range in dendro.
## ..done.
table(dynamicMods)
## dynamicMods
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 242 233 214 160 155 143 131 124 124 120 109 107 106 100 99 83 79 72 53 46
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
## dynamicColors
## black blue brown cyan green greenyellow
## 124 214 160 99 143 107
## grey grey60 lightcyan lightgreen lightyellow magenta
## 242 72 79 53 46 120
## midnightblue pink purple red salmon tan
## 83 124 109 131 100 106
## turquoise yellow
## 233 155
####changing names of colors
colorOrder = c("grey", standardColors(50))
list(colorOrder)
## [[1]]
## [1] "grey" "turquoise" "blue" "brown"
## [5] "yellow" "green" "red" "black"
## [9] "pink" "magenta" "purple" "greenyellow"
## [13] "tan" "salmon" "cyan" "midnightblue"
## [17] "lightcyan" "grey60" "lightgreen" "lightyellow"
## [21] "royalblue" "darkred" "darkgreen" "darkturquoise"
## [25] "darkgrey" "orange" "darkorange" "white"
## [29] "skyblue" "saddlebrown" "steelblue" "paleturquoise"
## [33] "violet" "darkolivegreen" "darkmagenta" "sienna3"
## [37] "yellowgreen" "skyblue3" "plum1" "orangered4"
## [41] "mediumpurple3" "lightsteelblue1" "lightcyan1" "ivory"
## [45] "floralwhite" "darkorange2" "brown4" "bisque4"
## [49] "darkslateblue" "plum2" "thistle2"
moduleLabels = match(dynamicColors, colorOrder)-1
head(moduleLabels)
## [1] 10 13 15 17 17 17
table(moduleLabels)
## moduleLabels
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 242 233 214 160 155 143 131 124 124 120 109 107 106 100 99 83 79 72 53 46
table(dynamicColors)
## dynamicColors
## black blue brown cyan green greenyellow
## 124 214 160 99 143 107
## grey grey60 lightcyan lightgreen lightyellow magenta
## 242 72 79 53 46 120
## midnightblue pink purple red salmon tan
## 83 124 109 131 100 106
## turquoise yellow
## 233 155
customColorOrder = c("grey", "brown", "deeppink3", "darkcyan","plum","darkgreen","red","black","pink","skyblue","plum4","greenyellow","tan","darkorchid4","darkgoldenrod1","midnightblue","lightcyan","grey60","lightgreen","yellow")
moduleColors.custom = customColorOrder[moduleLabels + 1]
table(dynamicColors, moduleColors.custom)##sanity check
## moduleColors.custom
## dynamicColors black brown darkcyan darkgoldenrod1 darkgreen darkorchid4
## black 124 0 0 0 0 0
## blue 0 0 0 0 0 0
## brown 0 0 160 0 0 0
## cyan 0 0 0 99 0 0
## green 0 0 0 0 143 0
## greenyellow 0 0 0 0 0 0
## grey 0 0 0 0 0 0
## grey60 0 0 0 0 0 0
## lightcyan 0 0 0 0 0 0
## lightgreen 0 0 0 0 0 0
## lightyellow 0 0 0 0 0 0
## magenta 0 0 0 0 0 0
## midnightblue 0 0 0 0 0 0
## pink 0 0 0 0 0 0
## purple 0 0 0 0 0 0
## red 0 0 0 0 0 0
## salmon 0 0 0 0 0 100
## tan 0 0 0 0 0 0
## turquoise 0 233 0 0 0 0
## yellow 0 0 0 0 0 0
## moduleColors.custom
## dynamicColors deeppink3 greenyellow grey grey60 lightcyan lightgreen
## black 0 0 0 0 0 0
## blue 214 0 0 0 0 0
## brown 0 0 0 0 0 0
## cyan 0 0 0 0 0 0
## green 0 0 0 0 0 0
## greenyellow 0 107 0 0 0 0
## grey 0 0 242 0 0 0
## grey60 0 0 0 72 0 0
## lightcyan 0 0 0 0 79 0
## lightgreen 0 0 0 0 0 53
## lightyellow 0 0 0 0 0 0
## magenta 0 0 0 0 0 0
## midnightblue 0 0 0 0 0 0
## pink 0 0 0 0 0 0
## purple 0 0 0 0 0 0
## red 0 0 0 0 0 0
## salmon 0 0 0 0 0 0
## tan 0 0 0 0 0 0
## turquoise 0 0 0 0 0 0
## yellow 0 0 0 0 0 0
## moduleColors.custom
## dynamicColors midnightblue pink plum plum4 red skyblue tan yellow
## black 0 0 0 0 0 0 0 0
## blue 0 0 0 0 0 0 0 0
## brown 0 0 0 0 0 0 0 0
## cyan 0 0 0 0 0 0 0 0
## green 0 0 0 0 0 0 0 0
## greenyellow 0 0 0 0 0 0 0 0
## grey 0 0 0 0 0 0 0 0
## grey60 0 0 0 0 0 0 0 0
## lightcyan 0 0 0 0 0 0 0 0
## lightgreen 0 0 0 0 0 0 0 0
## lightyellow 0 0 0 0 0 0 0 46
## magenta 0 0 0 0 0 120 0 0
## midnightblue 83 0 0 0 0 0 0 0
## pink 0 124 0 0 0 0 0 0
## purple 0 0 0 109 0 0 0 0
## red 0 0 0 0 131 0 0 0
## salmon 0 0 0 0 0 0 0 0
## tan 0 0 0 0 0 0 106 0
## turquoise 0 0 0 0 0 0 0 0
## yellow 0 0 155 0 0 0 0 0
str(dynamicColors)
## chr [1:2500] "purple" "salmon" "midnightblue" "grey60" "grey60" "grey60" ...
str(moduleColors.custom)
## chr [1:2500] "plum4" "darkorchid4" "midnightblue" "grey60" "grey60" ...
plotDendroAndColors(geneTree, moduleColors.custom, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
# Calculate eigengenes
MEList1 = moduleEigengenes(WGCNA_matrix, colors = moduleColors.custom)
MEs1 = MEList1$eigengenes
#Calculate dissimilarity of module eigengenes
MEDiss1 = 1-cor(MEs1);
# Cluster module eigengenes
METree1 = hclust(as.dist(MEDiss1), method = "average")
# Plot the result
par(mar = c(3,3,3,3))
plot(METree1, main = "Clustering of module eigengenes",
xlab = "", sub = "")
MEDissThres = 0.3
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
# Call an automatic merging function
merge = mergeCloseModules(WGCNA_matrix, moduleColors.custom, cutHeight = MEDissThres, verbose = 3)
## mergeCloseModules: Merging modules whose distance is less than 0.3
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 20 module eigengenes in given set.
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 16 module eigengenes in given set.
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 15 module eigengenes in given set.
## Calculating new MEs...
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 15 module eigengenes in given set.
#Eigengenes of the new merged modules with new colors
mergedMEs = merge$newMEs;
mergedColors = merge$colors
table(moduleColors.custom)
## moduleColors.custom
## black brown darkcyan darkgoldenrod1 darkgreen
## 124 233 160 99 143
## darkorchid4 deeppink3 greenyellow grey grey60
## 100 214 107 242 72
## lightcyan lightgreen midnightblue pink plum
## 79 53 83 124 155
## plum4 red skyblue tan yellow
## 109 131 120 106 46
table(mergedColors)
## mergedColors
## black brown darkcyan darkgoldenrod1 darkgreen
## 124 233 239 99 143
## darkorchid4 deeppink3 greenyellow grey grey60
## 100 214 160 242 203
## midnightblue pink plum4 skyblue yellow
## 83 385 109 120 46
plotDendroAndColors(geneTree, cbind(moduleColors.custom, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
nGenes = ncol(WGCNA_matrix)
nSamples = nrow(WGCNA_matrix)
modNames = substring(names(mergedMEs), 3)
MEs0 = moduleEigengenes(WGCNA_matrix, mergedColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, biconditions, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
save(MEs, mergedColors, geneTree,moduleTraitCor,moduleTraitPvalue,
file = "wgcna_manual_network20220910.RData")
lnames = load(file = "wgcna_manual_network20220910.RData")
lnames = load(file= "count_matrix_condition_info_dataInput.RData")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
par(mar = c(1,1,2,2))
par(mfrow=c(1,1))
sampleTree = hclust(dist(WGCNA_matrix), method = "average")
names(biconditions)
## [1] "H2_N_HO" "HS_N_LO" "HS_N_HO"
## [4] "HS_noN_HO" "LS_noN_HO" "SW_noN_HO"
## [7] "LS_N_HO" "H2_noN_HO" "H2_N_LO"
## [10] "H2_noN_LO" "Sulfide" "Oxygen"
## [13] "Nitrogen" "Hydrogen" "Low_to_No_sulfide"
biconditions <- biconditions[,1:14]
desired_order <- c("HS_N_HO" , "HS_noN_HO", "HS_N_LO","LS_N_HO","LS_noN_HO","SW_noN_HO","H2_N_HO","H2_noN_HO","H2_N_LO","H2_noN_LO","Sulfide","Oxygen","Nitrogen","Hydrogen")
biconditions <- biconditions[,match(desired_order,colnames(biconditions))]
traitColors = numbers2colors(biconditions, signed = TRUE);
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree, traitColors,
groupLabels = names(biconditions),
main = "Sample dendrogram and trait heatmap")
nGenes = ncol(WGCNA_matrix)
nSamples = nrow(WGCNA_matrix)
moduleColors = mergedColors
MEs0 = moduleEigengenes(WGCNA_matrix, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
##reordering MEs to match figure
mycolororder <- c("MEmidnightblue","MEdarkorchid4","MEyellow","MEgreenyellow", "MEblack", "MEdarkgreen","MEdeeppink3","MEpink","MEgrey60","MEbrown","MEplum4","MEskyblue","MEdarkcyan","MEdarkgoldenrod1","MEgrey")
MEs <- MEs[,match(mycolororder,colnames(MEs))]
biconditions <- biconditions[,match(desired_order,colnames(biconditions))]
moduleTraitCor = cor(MEs, biconditions, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
summary(moduleTraitPvalue)
## HS_N_HO HS_noN_HO HS_N_LO LS_N_HO
## Min. :0.01196 Min. :0.00000 Min. :0.003134 Min. :0.2678
## 1st Qu.:0.03387 1st Qu.:0.03521 1st Qu.:0.137401 1st Qu.:0.4198
## Median :0.14691 Median :0.27026 Median :0.344828 Median :0.5388
## Mean :0.26185 Mean :0.32608 Mean :0.396634 Mean :0.5700
## 3rd Qu.:0.32153 3rd Qu.:0.57388 3rd Qu.:0.671187 3rd Qu.:0.6861
## Max. :0.91450 Max. :0.93636 Max. :0.970864 Max. :0.9718
## LS_noN_HO SW_noN_HO H2_N_HO H2_noN_HO
## Min. :0.0000558 Min. :0.0008013 Min. :0.01812 Min. :0.03995
## 1st Qu.:0.1967968 1st Qu.:0.2808627 1st Qu.:0.29340 1st Qu.:0.13709
## Median :0.4090254 Median :0.3534417 Median :0.51191 Median :0.21959
## Mean :0.4217359 Mean :0.4078278 Mean :0.49212 Mean :0.33175
## 3rd Qu.:0.6354574 3rd Qu.:0.6356339 3rd Qu.:0.69145 3rd Qu.:0.50608
## Max. :0.8682022 Max. :0.7872686 Max. :0.98003 Max. :0.78614
## H2_N_LO H2_noN_LO Sulfide Oxygen
## Min. :0.001603 Min. :0.05381 Min. :0.001234 Min. :0.0000002
## 1st Qu.:0.060706 1st Qu.:0.10483 1st Qu.:0.067933 1st Qu.:0.0007034
## Median :0.145126 Median :0.18447 Median :0.174750 Median :0.0103513
## Mean :0.239831 Mean :0.27926 Mean :0.366110 Mean :0.0822130
## 3rd Qu.:0.435491 3rd Qu.:0.36867 3rd Qu.:0.667126 3rd Qu.:0.0620483
## Max. :0.716300 Max. :0.77636 Max. :0.970160 Max. :0.5030011
## Nitrogen Hydrogen
## Min. :0.002802 Min. :0.000174
## 1st Qu.:0.143535 1st Qu.:0.034898
## Median :0.314161 Median :0.068381
## Mean :0.334997 Mean :0.203924
## 3rd Qu.:0.555353 3rd Qu.:0.219406
## Max. :0.841863 Max. :0.900770
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(3,4, 1,1))
names(biconditions)
## [1] "HS_N_HO" "HS_noN_HO" "HS_N_LO" "LS_N_HO" "LS_noN_HO" "SW_noN_HO"
## [7] "H2_N_HO" "H2_noN_HO" "H2_N_LO" "H2_noN_LO" "Sulfide" "Oxygen"
## [13] "Nitrogen" "Hydrogen"
head(moduleTraitCor)
## HS_N_HO HS_noN_HO HS_N_LO LS_N_HO LS_noN_HO
## MEmidnightblue 0.02046914 0.77549274 -0.1012675 -0.006751255 0.24346567
## MEdarkorchid4 0.39363790 0.20790779 -0.3311569 0.208972393 0.24138458
## MEyellow 0.04160808 0.06616182 -0.5017755 0.093907529 0.06104898
## MEgreenyellow 0.22355044 0.08593010 -0.1231448 0.134486463 0.16362566
## MEblack 0.16895507 0.30463652 -0.5213307 0.137603215 0.15939483
## MEdarkgreen 0.29051730 0.14258634 -0.4439989 0.165647455 0.14956928
## SW_noN_HO H2_N_HO H2_noN_HO H2_N_LO H2_noN_LO
## MEmidnightblue 0.10228132 -0.326359663 -0.07370678 -0.3379315 -0.2956921
## MEdarkorchid4 0.06986956 -0.214684994 0.29526963 -0.5156230 -0.3555769
## MEyellow 0.18334438 0.064921651 0.15299999 0.1231695 -0.2853864
## MEgreenyellow 0.07837730 -0.428591372 0.23740423 -0.2124148 -0.1592232
## MEblack 0.17555561 -0.124569788 0.24385690 -0.3445732 -0.1995284
## MEdarkgreen -0.05142174 -0.004771732 0.23089975 -0.2973662 -0.1816616
## Sulfide Oxygen Nitrogen Hydrogen
## MEmidnightblue 0.454784198 0.4810992 -0.4511045 -0.63300331
## MEdarkorchid4 0.177011014 0.7871273 -0.2753128 -0.48415103
## MEyellow -0.257937228 0.4346851 -0.1069013 0.03411204
## MEgreenyellow 0.121985353 0.3239114 -0.2436685 -0.34465859
## MEblack -0.031252607 0.6974892 -0.4103493 -0.26014469
## MEdarkgreen -0.007132597 0.6042628 -0.1739832 -0.15486885
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(biconditions),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
cex.lab.x =0.4,
cex.lab.y=0.4,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
nGenes = ncol(WGCNA_matrix)
nSamples = nrow(WGCNA_matrix)
names(biconditions)
## [1] "HS_N_HO" "HS_noN_HO" "HS_N_LO" "LS_N_HO" "LS_noN_HO" "SW_noN_HO"
## [7] "H2_N_HO" "H2_noN_HO" "H2_N_LO" "H2_noN_LO" "Sulfide" "Oxygen"
## [13] "Nitrogen" "Hydrogen"
Sulfide = as.data.frame(biconditions$Sulfide);
names(Sulfide)[1] <- "Sulfide"
Oxygen <- as.data.frame(biconditions$Oxygen)
names(Oxygen)[1] <- "Oxygen"
Nitrogen <- as.data.frame(biconditions$Nitrogen)
names(Nitrogen)[1] <- "Nitrogen"
Hydrogen <- as.data.frame(biconditions$Hydrogen)
names(Hydrogen)[1] <- "Hydrogen"
H2_N_HO <- as.data.frame(biconditions$H2_N_HO)
names(H2_N_HO)[1] <- "H2_N_HO"
HS_N_LO <- as.data.frame(biconditions$HS_N_LO)
names(HS_N_LO)[1] <- "HS_N_LO"
HS_N_HO <- as.data.frame(biconditions$HS_N_HO)
names(HS_N_HO)[1] <- "HS_N_HO"
HS_noN_HO <- as.data.frame(biconditions$HS_noN_HO)
names(HS_noN_HO)[1] <- "HS_noN_HO"
LS_noN_HO <- as.data.frame(biconditions$LS_noN_HO)
names(LS_noN_HO)[1] <- "LS_noN_HO"
SW_noN_HO <- as.data.frame(biconditions$SW_noN_HO)
names(SW_noN_HO)[1] <- "SW_noN_HO"
LS_N_HO <- as.data.frame(biconditions$LS_N_HO)
names(LS_N_HO)[1] <- "LS_N_HO"
H2_noN_HO <- as.data.frame(biconditions$H2_noN_HO)
names(H2_noN_HO)[1] <- "H2_noN_HO"
H2_N_LO <- as.data.frame(biconditions$H2_N_LO)
names(H2_N_LO)[1] <- "H2_N_LO"
H2_noN_LO <- as.data.frame(biconditions$H2_noN_LO)
names(H2_noN_LO)[1] <- "H2_noN_LO"
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(WGCNA_matrix, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
geneSignificance=as.data.frame(cor(WGCNA_matrix, biconditions, use = "p"))
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSignificance), nSamples))
names(geneSignificance) = paste("GS.", names(biconditions), sep="");
names(GSPvalue) = paste("p.Sul.", names(biconditions), sep="");
str(geneModuleMembership)
## 'data.frame': 2500 obs. of 15 variables:
## $ MMmidnightblue : num -0.298 0.648 0.768 0.625 0.599 ...
## $ MMdarkorchid4 : num -0.474 0.848 0.291 0.445 0.409 ...
## $ MMyellow : num -0.0623 0.163 -0.2172 0.211 -0.0863 ...
## $ MMgreenyellow : num -0.1228 0.3899 0.0313 0.0184 -0.2098 ...
## $ MMblack : num -0.1697 0.5395 0.0647 0.4343 0.2309 ...
## $ MMdarkgreen : num -0.0548 0.4469 -0.0929 0.2286 -0.0154 ...
## $ MMdeeppink3 : num -0.0172 0.4138 0.0872 0.1792 0.0883 ...
## $ MMpink : num -0.016 0.5972 0.1198 0.1043 -0.0477 ...
## $ MMgrey60 : num -0.656 0.183 0.255 0.474 0.511 ...
## $ MMbrown : num -0.138 -0.295 0.237 0.274 0.475 ...
## $ MMplum4 : num 0.648 -0.599 -0.107 -0.237 -0.344 ...
## $ MMskyblue : num 0.247 -0.286 -0.58 -0.626 -0.677 ...
## $ MMdarkcyan : num 0.256 -0.84 -0.471 -0.44 -0.391 ...
## $ MMdarkgoldenrod1: num -0.173 -0.442 -0.579 -0.381 -0.328 ...
## $ MMgrey : num -0.3438 0.3343 -0.1019 0.0722 0.278 ...
str(geneSignificance)
## 'data.frame': 2500 obs. of 14 variables:
## $ GS.HS_N_HO : num -0.119 0.376 -0.219 -0.477 -0.338 ...
## $ GS.HS_noN_HO: num 0.134 0.419 0.675 0.431 0.299 ...
## $ GS.HS_N_LO : num 0.1614 -0.3814 0.0652 -0.2549 -0.0556 ...
## $ GS.LS_N_HO : num 0.0746 0.1922 0.1029 0.0454 -0.0436 ...
## $ GS.LS_noN_HO: num -0.567 0.24 0.114 0.362 0.321 ...
## $ GS.SW_noN_HO: num -0.1715 -0.1254 0.0558 0.1978 0.2951 ...
## $ GS.H2_N_HO : num 0.1176 -0.0198 -0.3035 0.0256 -0.0918 ...
## $ GS.H2_noN_HO: num -0.1839 0.0707 -0.1085 0.2563 0.2329 ...
## $ GS.H2_N_LO : num 0.196 -0.365 -0.251 -0.358 -0.285 ...
## $ GS.H2_noN_LO: num 0.357 -0.406 -0.131 -0.229 -0.334 ...
## $ GS.Sulfide : num 0.1156 0.2704 0.3413 -0.1966 -0.0622 ...
## $ GS.Oxygen : num -0.468 0.754 0.208 0.551 0.442 ...
## $ GS.Nitrogen : num 0.259 -0.119 -0.364 -0.611 -0.489 ...
## $ GS.Hydrogen : num 0.298 -0.441 -0.486 -0.187 -0.293 ...
geneSulfideSignificance=as.data.frame(cor(WGCNA_matrix, Sulfide, use = "p"))
SulPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSulfideSignificance), nSamples))
names(geneSulfideSignificance) = paste("Sul.", names(Sulfide), sep="");
names(SulPvalue) = paste("p.Sul.", names(Sulfide), sep="");
geneOxygenSignificance=as.data.frame(cor(WGCNA_matrix, Oxygen, use = "p"))
OxyPvalue = as.data.frame(corPvalueStudent(as.matrix(geneOxygenSignificance), nSamples))
names(geneOxygenSignificance) = paste("Oxy.", names(Oxygen), sep="");
names(OxyPvalue) = paste("p.Oxy.", names(Oxygen), sep="");
geneNitrogenSignificance=as.data.frame(cor(WGCNA_matrix, Nitrogen, use = "p"))
NitPvalue = as.data.frame(corPvalueStudent(as.matrix(geneNitrogenSignificance), nSamples))
names(geneNitrogenSignificance) = paste("Nit.", names(Nitrogen), sep="");
names(NitPvalue) = paste("p.Nit.", names(Nitrogen), sep="");
geneHydrogenSignificance=as.data.frame(cor(WGCNA_matrix, Hydrogen, use = "p"))
HydPvalue = as.data.frame(corPvalueStudent(as.matrix(geneHydrogenSignificance), nSamples))
names(geneHydrogenSignificance) = paste("Hyd.", names(Hydrogen), sep="");
names(HydPvalue) = paste("p.Hyd.", names(Hydrogen), sep="");
####making dataframe to add to cytoscape layer#########################################
sulfsig <- geneSulfideSignificance
sulfsig$locusID <- rownames(sulfsig)
rownames(sulfsig) <- NULL
sulfsig <- sulfsig[,c(2,1)]
oxysig <- geneOxygenSignificance
oxysig$locusID <- rownames(oxysig)
rownames(oxysig) <- NULL
oxysig <- oxysig[,c(2,1)]
nitrosig <- geneNitrogenSignificance
nitrosig$locusID <- rownames(nitrosig)
rownames(nitrosig) <- NULL
nitrosig <- nitrosig[,c(2,1)]
hydrosig <- geneHydrogenSignificance
hydrosig$locusID <- rownames(hydrosig)
rownames(hydrosig) <- NULL
hydrosig <- hydrosig[,c(2,1)]
sig.genes <- cbind(sulfsig,oxysig$Oxy.Oxygen,nitrosig$Nit.Nitrogen,hydrosig$Hyd.Hydrogen)
names(sig.genes)[2] <- "Sulfide_GS"
names(sig.genes)[3] <- "Oxygen_GS"
names(sig.genes)[4] <- "Nitrogen_GS"
names(sig.genes)[5] <- "Hydrogen_GS"
names(sig.genes)
## [1] "locusID" "Sulfide_GS" "Oxygen_GS" "Nitrogen_GS" "Hydrogen_GS"
#write.csv(sig.genes,"sig.genes.csv")
#########################making datframe to oxygen and sulfide dataframe########################################################
sig.genes3 <- cbind(sulfsig,oxysig$Oxy.Oxygen)
colnames(sig.genes3)
## [1] "locusID" "Sul.Sulfide" "oxysig$Oxy.Oxygen"
colnames(sig.genes3) <- c("locusID","corSulfide","corOxygen")
sulP <- SulPvalue
sulP <- tibble(locusID = rownames(sulP), sulP)
oxyP <- OxyPvalue
oxyP <- tibble(locusID = rownames(oxyP), oxyP)
colnames(sulP)
## [1] "locusID" "p.Sul.Sulfide"
colnames(sulP) <- c("locusID","pval_Sulfide")
colnames(oxyP)
## [1] "locusID" "p.Oxy.Oxygen"
colnames(oxyP) <- c("locusID","pval_Oxygen")
GMM <- geneModuleMembership
GMMP <- MMPvalue
head(sig.genes3)
## locusID corSulfide corOxygen
## 1 gene-L0Y14_RS04070 0.11559790 -0.4677785
## 2 gene-L0Y14_RS09775 0.27041835 0.7541553
## 3 gene-L0Y14_RS01990 0.34126255 0.2075954
## 4 gene-L0Y14_RS09950 -0.19662226 0.5510156
## 5 gene-L0Y14_RS09955 -0.06219137 0.4415475
## 6 gene-L0Y14_RS12365 -0.23047838 0.2361504
head(GMM)
## MMmidnightblue MMdarkorchid4 MMyellow MMgreenyellow
## gene-L0Y14_RS04070 -0.2978002 -0.4742295 -0.06228793 -0.12283434
## gene-L0Y14_RS09775 0.6475847 0.8479211 0.16301606 0.38986765
## gene-L0Y14_RS01990 0.7683016 0.2909617 -0.21720940 0.03127418
## gene-L0Y14_RS09950 0.6250107 0.4452151 0.21104415 0.01837192
## gene-L0Y14_RS09955 0.5993774 0.4088890 -0.08634607 -0.20976909
## gene-L0Y14_RS12365 0.3632049 0.1311813 -0.06645301 -0.38765511
## MMblack MMdarkgreen MMdeeppink3 MMpink MMgrey60
## gene-L0Y14_RS04070 -0.16971124 -0.05481641 -0.01719957 -0.01598375 -0.6563363
## gene-L0Y14_RS09775 0.53953441 0.44692677 0.41381637 0.59720934 0.1830737
## gene-L0Y14_RS01990 0.06471391 -0.09289025 0.08717597 0.11976742 0.2550905
## gene-L0Y14_RS09950 0.43427234 0.22863891 0.17923724 0.10431595 0.4740972
## gene-L0Y14_RS09955 0.23088121 -0.01544034 0.08834925 -0.04765631 0.5107174
## gene-L0Y14_RS12365 -0.00202547 -0.12064102 -0.25455458 -0.31551755 0.7070535
## MMbrown MMplum4 MMskyblue MMdarkcyan
## gene-L0Y14_RS04070 -0.1384809 0.6483345 0.2465064 0.25589117
## gene-L0Y14_RS09775 -0.2948900 -0.5986951 -0.2859013 -0.84047590
## gene-L0Y14_RS01990 0.2365179 -0.1068451 -0.5795192 -0.47056380
## gene-L0Y14_RS09950 0.2737269 -0.2366527 -0.6256319 -0.43994105
## gene-L0Y14_RS09955 0.4747395 -0.3438988 -0.6771603 -0.39111707
## gene-L0Y14_RS12365 0.4675125 -0.2198442 -0.5555037 -0.09885019
## MMdarkgoldenrod1 MMgrey
## gene-L0Y14_RS04070 -0.1725039 -0.34377603
## gene-L0Y14_RS09775 -0.4423334 0.33425164
## gene-L0Y14_RS01990 -0.5786492 -0.10188510
## gene-L0Y14_RS09950 -0.3808909 0.07218213
## gene-L0Y14_RS09955 -0.3278772 0.27804497
## gene-L0Y14_RS12365 -0.2081241 0.06962918
head(GMMP)
## p.MMmidnightblue p.MMdarkorchid4 p.MMyellow p.MMgreenyellow
## gene-L0Y14_RS04070 1.099727e-01 8.107296e-03 0.7436784 0.51785242
## gene-L0Y14_RS09775 1.095424e-04 3.335534e-09 0.3893952 0.03319238
## gene-L0Y14_RS01990 7.146697e-07 1.187803e-01 0.2489174 0.86968629
## gene-L0Y14_RS09950 2.220815e-04 1.368570e-02 0.2629330 0.92323522
## gene-L0Y14_RS09955 4.649887e-04 2.486376e-02 0.6500519 0.26589457
## gene-L0Y14_RS12365 4.852036e-02 4.895817e-01 0.7271658 0.03429251
## p.MMblack p.MMdarkgreen p.MMdeeppink3 p.MMpink
## gene-L0Y14_RS04070 0.369944508 0.77357610 0.92812078 0.9331901555
## gene-L0Y14_RS09775 0.002091438 0.01328534 0.02301156 0.0004935721
## gene-L0Y14_RS01990 0.734046428 0.62539413 0.64690412 0.5284364628
## gene-L0Y14_RS09950 0.016490067 0.22426245 0.34327557 0.5832907609
## gene-L0Y14_RS09955 0.219626475 0.93545678 0.64246404 0.8025293779
## gene-L0Y14_RS12365 0.991524596 0.52541108 0.17462403 0.0894278762
## p.MMgrey60 p.MMbrown p.MMplum4 p.MMskyblue
## gene-L0Y14_RS04070 8.200365e-05 0.465519128 0.0001068964 1.891247e-01
## gene-L0Y14_RS09775 3.328721e-01 0.113659304 0.0004738236 1.256266e-01
## gene-L0Y14_RS01990 1.736871e-01 0.208261116 0.5741429330 7.907558e-04
## gene-L0Y14_RS09950 8.127494e-03 0.143281237 0.2079944552 2.179639e-04
## gene-L0Y14_RS09955 3.928636e-03 0.008029824 0.0627743027 3.961873e-05
## gene-L0Y14_RS12365 1.253228e-05 0.009188101 0.2430811504 1.438928e-03
## p.MMdarkcyan p.MMdarkgoldenrod1 p.MMgrey
## gene-L0Y14_RS04070 1.722940e-01 0.3620032361 0.06287441
## gene-L0Y14_RS09775 6.204607e-09 0.0143824230 0.07102686
## gene-L0Y14_RS01990 8.682956e-03 0.0008087425 0.59214321
## gene-L0Y14_RS09950 1.498308e-02 0.0378395675 0.70464811
## gene-L0Y14_RS09955 3.258386e-02 0.0769249135 0.13682488
## gene-L0Y14_RS12365 6.032765e-01 0.2697473140 0.71465331
dim(sig.genes3)
## [1] 2500 3
####before I can merge these I have to change the rownames to columns
# Assuming your datasets are named df1, df2, df3
# Convert row names to column for df2 and df3
GMM <- tibble(locusID = rownames(GMM), GMM)
GMMP <- tibble(locusID = rownames(GMMP), GMMP)
colnames(GMM) <- c("locusID", "GM_MEpurple","GM_MEmidnightblue", "GM_MElightyellow","GM_MEgreenyellow","GM_MEblack","GM_MEgreen","GM_MEcherry","GM_MEpink","GM_MEbrown","GM_MEgrey60", "GM_MElightpurple","GM_MEskyblue","GM_MEteal","GM_MEgold","GM_MEgrey")
colnames(GMMP) <- c("locusID", "GMPval_MEpurple","GMPval_MEmidnightblue", "GMPval_MElightyellow","GMPval_MEgreenyellow","GMPval_MEblack","GMPval_MEgreen","GMPval_MEcherry","GMPval_MEpink","GMPval_MEbrown","GMPval_MEgrey60", "GMPval_MElightpurple","GMPval_MEskyblue","GMPval_MEteal","GMPval_MEgold","GMPval_MEgrey")
merged_df <- merge(sig.genes3, sulP, by = "locusID", all.x = TRUE)
merged_df2 <- merge(merged_df, oxyP, by = "locusID", all.x = TRUE)
merged_df3 <- merge(merged_df2, GMM, by = "locusID", all.x = TRUE)
merged_df4 <- merge(merged_df3, GMMP, by = "locusID", all.x = TRUE)
#write.csv(merged_df4,"GM_GS_sulfide_oxygen.csv")
##################################################################################################################
names(modNames)
## NULL
moduleColors = mergedColors
table(moduleColors)
## moduleColors
## black brown darkcyan darkgoldenrod1 darkgreen
## 124 233 239 99 143
## darkorchid4 deeppink3 greenyellow grey grey60
## 100 214 160 242 203
## midnightblue pink plum4 skyblue yellow
## 83 385 109 120 46
module = "midnightblue"
column = match(module, modNames);
moduleGenes = moduleColors==module;
module2 = "brown"
column2 = match(module2, modNames);
moduleGenes2 = moduleColors==module2;
module3 = "darkgoldenrod1"
column3 = match(module3, modNames);
moduleGenes3 = moduleColors==module3;
module4 = "pink"
column4 = match(module4, modNames);
moduleGenes4 = moduleColors==module4;
module5 = "black"
column5 = match(module5, modNames);
moduleGenes5 = moduleColors==module5;
module6 = "deeppink3"
column6 = match(module6, modNames);
moduleGenes6 = moduleColors==module6;
module7 = "darkcyan"
column7 = match(module7, modNames);
moduleGenes7 = moduleColors==module7;
module8 = "darkgreen"
column8 = match(module8, modNames);
moduleGenes8 = moduleColors==module8;
module9 = "greenyellow"
column9 = match(module9, modNames);
moduleGenes9 = moduleColors==module9;
module10 = "grey60"
column10 = match(module10, modNames);
moduleGenes10 = moduleColors==module10;
module11 = "yellow"
column11 = match(module11, modNames);
moduleGenes11 = moduleColors==module11;
module12 = "skyblue"
column12 = match(module12, modNames);
moduleGenes12 = moduleColors==module12;
module13 = "plum4"
column13 = match(module13, modNames);
moduleGenes13 = moduleColors==module13;
module14 = "darkorchid4"
column14 = match(module14, modNames);
moduleGenes14 = moduleColors==module14;
module15 = "grey"
column15 = match(module15, modNames);
moduleGenes15 = moduleColors==module15;
#sizeGrWindow(7, 7); # uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))
#midnight blue and brown
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneSulfideSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Sulfide",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneOxygenSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneHydrogenSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneNitrogenSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
abs(geneSulfideSignificance[moduleGenes2, 1]),
xlab = paste("Module Membership in", module2, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
abs(geneOxygenSignificance[moduleGenes2, 1]),
xlab = paste("Module Membership in", module2, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
abs(geneHydrogenSignificance[moduleGenes2, 1]),
xlab = paste("Module Membership in", module2, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
abs(geneNitrogenSignificance[moduleGenes2, 1]),
xlab = paste("Module Membership in", module2, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
abline=TRUE)
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))
####gold and pink
verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
abs(geneSulfideSignificance[moduleGenes3, 1]),
xlab = paste("Module Membership in", module3, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
abs(geneOxygenSignificance[moduleGenes3, 1]),
xlab = paste("Module Membership in", module3, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
abs(geneHydrogenSignificance[moduleGenes3, 1]),
xlab = paste("Module Membership in", module3, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
abs(geneNitrogenSignificance[moduleGenes3, 1]),
xlab = paste("Module Membership in", module3, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
abline=TRUE)
#pink
verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
abs(geneSulfideSignificance[moduleGenes4, 1]),
xlab = paste("Module Membership in", module4, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
abs(geneOxygenSignificance[moduleGenes4, 1]),
xlab = paste("Module Membership in", module4, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
abs(geneHydrogenSignificance[moduleGenes4, 1]),
xlab = paste("Module Membership in", module4, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
abs(geneNitrogenSignificance[moduleGenes4, 1]),
xlab = paste("Module Membership in", module4, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
abline=TRUE)
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))
####black and blue
verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
abs(geneSulfideSignificance[moduleGenes5, 1]),
xlab = paste("Module Membership in", module5, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
abs(geneOxygenSignificance[moduleGenes5, 1]),
xlab = paste("Module Membership in", module5, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
abs(geneHydrogenSignificance[moduleGenes5, 1]),
xlab = paste("Module Membership in", module5, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
abs(geneNitrogenSignificance[moduleGenes5, 1]),
xlab = paste("Module Membership in", module5, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
abline=TRUE)
#blue
verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
abs(geneSulfideSignificance[moduleGenes6, 1]),
xlab = paste("Module Membership in", module6, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
abs(geneOxygenSignificance[moduleGenes6, 1]),
xlab = paste("Module Membership in", module6, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
abs(geneHydrogenSignificance[moduleGenes6, 1]),
xlab = paste("Module Membership in", module6, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
abs(geneNitrogenSignificance[moduleGenes6, 1]),
xlab = paste("Module Membership in", module6, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
abline=TRUE)
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))
# darkcyan and green (7 and 8)
#darkcyan
verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
abs(geneSulfideSignificance[moduleGenes7, 1]),
xlab = paste("Module Membership in", module7, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
abs(geneOxygenSignificance[moduleGenes7, 1]),
xlab = paste("Module Membership in", module7, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
abs(geneHydrogenSignificance[moduleGenes7, 1]),
xlab = paste("Module Membership in", module7, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
abs(geneNitrogenSignificance[moduleGenes7, 1]),
xlab = paste("Module Membership in", module7, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
abline=TRUE)
#green
verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
abs(geneSulfideSignificance[moduleGenes8, 1]),
xlab = paste("Module Membership in", module8, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
abs(geneOxygenSignificance[moduleGenes8, 1]),
xlab = paste("Module Membership in", module8, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
abs(geneHydrogenSignificance[moduleGenes8, 1]),
xlab = paste("Module Membership in", module8, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
abs(geneNitrogenSignificance[moduleGenes8, 1]),
xlab = paste("Module Membership in", module8, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
abline=TRUE)
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))
# greenyellow and grey60
#greenyellow
verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
abs(geneSulfideSignificance[moduleGenes9, 1]),
xlab = paste("Module Membership in", module9, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
abs(geneOxygenSignificance[moduleGenes9, 1]),
xlab = paste("Module Membership in", module9, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
abs(geneHydrogenSignificance[moduleGenes9, 1]),
xlab = paste("Module Membership in", module9, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
abs(geneNitrogenSignificance[moduleGenes9, 1]),
xlab = paste("Module Membership in", module9, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
abline=TRUE)
#grey60
verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
abs(geneSulfideSignificance[moduleGenes10, 1]),
xlab = paste("Module Membership in", module10, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
abs(geneOxygenSignificance[moduleGenes10, 1]),
xlab = paste("Module Membership in", module10, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
abs(geneHydrogenSignificance[moduleGenes10, 1]),
xlab = paste("Module Membership in", module10, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
abs(geneNitrogenSignificance[moduleGenes10, 1]),
xlab = paste("Module Membership in", module10, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
abline=TRUE)
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))
#lightyellow and magenta (11,12)
#lightyellow
verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
abs(geneSulfideSignificance[moduleGenes11, 1]),
xlab = paste("Module Membership in", module11, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
abs(geneOxygenSignificance[moduleGenes11, 1]),
xlab = paste("Module Membership in", module11, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
abs(geneHydrogenSignificance[moduleGenes11, 1]),
xlab = paste("Module Membership in", module11, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
abs(geneNitrogenSignificance[moduleGenes11, 1]),
xlab = paste("Module Membership in", module11, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
abline=TRUE)
#magenta
verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
abs(geneSulfideSignificance[moduleGenes12, 1]),
xlab = paste("Module Membership in", module12, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
abs(geneOxygenSignificance[moduleGenes12, 1]),
xlab = paste("Module Membership in", module12, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
abs(geneHydrogenSignificance[moduleGenes12, 1]),
xlab = paste("Module Membership in", module12, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
abs(geneNitrogenSignificance[moduleGenes12, 1]),
xlab = paste("Module Membership in", module12, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
abline=TRUE)
str(geneSulfideSignificance)
## 'data.frame': 2500 obs. of 1 variable:
## $ Sul.Sulfide: num 0.1156 0.2704 0.3413 -0.1966 -0.0622 ...
what <- geneSulfideSignificance
what$locusID <- rownames(what)
what <- what[,c(2,1)]
# Writing the dataframe to a CSV file so I can use this in cytoscape for a layer
##write.csv(what, file = "geneSulfideSignificance.csv", row.names = FALSE)
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))
#purple and salmon (13,14)
#purple
verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
abs(geneSulfideSignificance[moduleGenes13, 1]),
xlab = paste("Module Membership in", module13, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
abs(geneOxygenSignificance[moduleGenes13, 1]),
xlab = paste("Module Membership in", module13, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
abs(geneHydrogenSignificance[moduleGenes13, 1]),
xlab = paste("Module Membership in", module13, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
abs(geneNitrogenSignificance[moduleGenes13, 1]),
xlab = paste("Module Membership in", module13, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
abline=TRUE)
#salmon
verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
abs(geneSulfideSignificance[moduleGenes14, 1]),
xlab = paste("Module Membership in", module14, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
abs(geneOxygenSignificance[moduleGenes14, 1]),
xlab = paste("Module Membership in", module14, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
abs(geneHydrogenSignificance[moduleGenes14, 1]),
xlab = paste("Module Membership in", module14, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
abs(geneNitrogenSignificance[moduleGenes14, 1]),
xlab = paste("Module Membership in", module14, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
abline=TRUE)
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))
#grey
verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
abs(geneSulfideSignificance[moduleGenes15, 1]),
xlab = paste("Module Membership in", module15, "module"),
ylab = "Sulfide",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
abs(geneOxygenSignificance[moduleGenes15, 1]),
xlab = paste("Module Membership in", module15, "module"),
ylab = "Oxygen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
abs(geneHydrogenSignificance[moduleGenes15, 1]),
xlab = paste("Module Membership in", module15, "module"),
ylab = "Hydrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
abs(geneNitrogenSignificance[moduleGenes15, 1]),
xlab = paste("Module Membership in", module15, "module"),
ylab = "Nitrogen",
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
abline=TRUE)
######now I am just making the plots that will go next to the gene significance barplots that I made in the previous section, I will do the two most significant modules for each treatment condition
### for sulfide that is teal and gold
#sizeGrWindow(3,5);
par(mfrow = c(1,2))
par(mar=c(4,3,4,3))
verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
abs(geneSulfideSignificance[moduleGenes7, 1]),
xlab = paste("Module Membership in teal module"),
ylab = "Sulfide",
pch = 16,
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
abs(geneSulfideSignificance[moduleGenes3, 1]),
xlab = paste("Module Membership in gold module"),
ylab = "Sulfide",
pch = 16,
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
abline=TRUE)
#sizeGrWindow(3,5);
par(mfrow = c(1,2))
par(mar=c(4,3,4,3))
#### for oxygen in black and purple modules
verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
abs(geneOxygenSignificance[moduleGenes14, 1]),
xlab = paste("Module Membership in purple"),
ylab = "Oxygen",
pch = 16,
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
abline = TRUE, xaxt = "n") # Suppress x-axis
verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
abs(geneOxygenSignificance[moduleGenes5, 1]),
xlab = paste("Module Membership in black"),
ylab = "oxygen",
pch = 16,
cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
abline=TRUE)
####genes of significance in teal module (darkcyan) under Sulfide
#geneSulfideSignificance
FilterGenes= abs(geneSulfideSignificance)> .2 & abs(geneModuleMembership$MMdarkcyan)>.8 & SulPvalue <= 0.05
table(FilterGenes)
## FilterGenes
## FALSE TRUE
## 2450 50
head(FilterGenes)
## Sul.Sulfide
## gene-L0Y14_RS04070 FALSE
## gene-L0Y14_RS09775 FALSE
## gene-L0Y14_RS01990 FALSE
## gene-L0Y14_RS09950 FALSE
## gene-L0Y14_RS09955 FALSE
## gene-L0Y14_RS12365 FALSE
Sulfideteal_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
Sulfideteal_filtered <- gsub("e.L", "e-L", Sulfideteal_filtered)
gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
gene_names <- gene_names[,2:3]
names(gene_names)[1] <- "locusID"
Sulfideteal_filtered <- as.data.frame(Sulfideteal_filtered)
names(Sulfideteal_filtered)[1] <- "locusID"
str(Sulfideteal_filtered)
## 'data.frame': 50 obs. of 1 variable:
## $ locusID: chr "gene-L0Y14_RS07425" "gene-L0Y14_RS07440" "gene-L0Y14_RS07430" "gene-L0Y14_RS07435" ...
Sulfideteal_filtered <- left_join(Sulfideteal_filtered,gene_names)
## Joining with `by = join_by(locusID)`
dim(Sulfideteal_filtered)
## [1] 50 2
names(Sulfideteal_filtered)
## [1] "locusID" "gene"
list <- 1:50
condition <- rep("Sulfide",length(list))
moduleFil <- rep("teal",length(list))
a <- cbind(Sulfideteal_filtered,condition,moduleFil)
sul.teal <- a
####genes of significance in gold module under Sulfide
#geneSulfideSignificance
FilterGenes= abs(geneSulfideSignificance)> .2 & abs(geneModuleMembership$MMdarkgoldenrod1)>.8 & SulPvalue <= 0.05
table(FilterGenes)
## FilterGenes
## FALSE TRUE
## 2478 22
Sulfidegold_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
Sulfidegold_filtered <- gsub("e.L", "e-L", Sulfidegold_filtered)
gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
gene_names <- gene_names[,2:3]
names(gene_names)[1] <- "locusID"
Sulfidegold_filtered <- as.data.frame(Sulfidegold_filtered)
names(Sulfidegold_filtered)[1] <- "locusID"
str(Sulfidegold_filtered)
## 'data.frame': 22 obs. of 1 variable:
## $ locusID: chr "gene-L0Y14_RS01985" "gene-L0Y14_RS07695" "gene-L0Y14_RS00080" "gene-L0Y14_RS12030" ...
Sulfidegold_filtered <- left_join(Sulfidegold_filtered,gene_names)
## Joining with `by = join_by(locusID)`
dim(Sulfidegold_filtered)
## [1] 22 2
names(Sulfidegold_filtered)
## [1] "locusID" "gene"
list <- 1:22
condition <- rep("Sulfide",length(list))
moduleFil <- rep("darkgoldenrod1",length(list))
d <- cbind(Sulfidegold_filtered,condition,moduleFil)
sulfide_hub_gold <- d
####genes of significance in black module under Oxygen
#geneOxygenSignificance
FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMblack)>.8 & OxyPvalue <= 0.05
table(FilterGenes)
## FilterGenes
## FALSE TRUE
## 2467 33
head(FilterGenes)
## Oxy.Oxygen
## gene-L0Y14_RS04070 FALSE
## gene-L0Y14_RS09775 FALSE
## gene-L0Y14_RS01990 FALSE
## gene-L0Y14_RS09950 FALSE
## gene-L0Y14_RS09955 FALSE
## gene-L0Y14_RS12365 FALSE
Oxygenblack_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
Oxygenblack_filtered <- gsub("e.L", "e-L", Oxygenblack_filtered)
gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
gene_names <- gene_names[,2:3]
names(gene_names)[1] <- "locusID"
Oxygenblack_filtered <- as.data.frame(Oxygenblack_filtered)
names(Oxygenblack_filtered)[1] <- "locusID"
str(Oxygenblack_filtered)
## 'data.frame': 33 obs. of 1 variable:
## $ locusID: chr "gene-L0Y14_RS14085" "gene-L0Y14_RS12670" "gene-L0Y14_RS08400" "gene-L0Y14_RS05880" ...
Oxygenblack_filtered <- left_join(Oxygenblack_filtered,gene_names)
## Joining with `by = join_by(locusID)`
dim(Oxygenblack_filtered)
## [1] 33 2
names(Oxygenblack_filtered)
## [1] "locusID" "gene"
list <- 1:33
condition <- rep("Oxygen",length(list))
moduleFil <- rep("black",length(list))
oxy_b <- cbind(Oxygenblack_filtered,condition,moduleFil)
#####
names(geneModuleMembership)
## [1] "MMmidnightblue" "MMdarkorchid4" "MMyellow" "MMgreenyellow"
## [5] "MMblack" "MMdarkgreen" "MMdeeppink3" "MMpink"
## [9] "MMgrey60" "MMbrown" "MMplum4" "MMskyblue"
## [13] "MMdarkcyan" "MMdarkgoldenrod1" "MMgrey"
names(geneModuleMembership)
## [1] "MMmidnightblue" "MMdarkorchid4" "MMyellow" "MMgreenyellow"
## [5] "MMblack" "MMdarkgreen" "MMdeeppink3" "MMpink"
## [9] "MMgrey60" "MMbrown" "MMplum4" "MMskyblue"
## [13] "MMdarkcyan" "MMdarkgoldenrod1" "MMgrey"
####genes of significance in purple (darkorchid) module under Oxygen
#geneOxygenSignificance
names(geneModuleMembership)
## [1] "MMmidnightblue" "MMdarkorchid4" "MMyellow" "MMgreenyellow"
## [5] "MMblack" "MMdarkgreen" "MMdeeppink3" "MMpink"
## [9] "MMgrey60" "MMbrown" "MMplum4" "MMskyblue"
## [13] "MMdarkcyan" "MMdarkgoldenrod1" "MMgrey"
FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMdarkorchid4)>.8 & OxyPvalue <= 0.05
table(FilterGenes)
## FilterGenes
## FALSE TRUE
## 2476 24
Oxygenpurple_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
Oxygenpurple_filtered <- gsub("e.L", "e-L", Oxygenpurple_filtered)
gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
gene_names <- gene_names[,2:3]
names(gene_names)[1] <- "locusID"
Oxygenpurple_filtered <- as.data.frame(Oxygenpurple_filtered)
names(Oxygenpurple_filtered)[1] <- "locusID"
str(Oxygenpurple_filtered)
## 'data.frame': 24 obs. of 1 variable:
## $ locusID: chr "gene-L0Y14_RS09775" "gene-L0Y14_RS16350" "gene-L0Y14_RS12700" "gene-L0Y14_RS07425" ...
Oxygenpurple_filtered <- left_join(Oxygenpurple_filtered,gene_names)
## Joining with `by = join_by(locusID)`
dim(Oxygenpurple_filtered)
## [1] 24 2
names(Oxygenpurple_filtered)
## [1] "locusID" "gene"
list <- 1:24
condition <- rep("Oxygen",length(list))
moduleFil <- rep("darkorchid4",length(list))
oxy_purple <- cbind(Oxygenpurple_filtered,condition,moduleFil)
#####
names(geneModuleMembership)
## [1] "MMmidnightblue" "MMdarkorchid4" "MMyellow" "MMgreenyellow"
## [5] "MMblack" "MMdarkgreen" "MMdeeppink3" "MMpink"
## [9] "MMgrey60" "MMbrown" "MMplum4" "MMskyblue"
## [13] "MMdarkcyan" "MMdarkgoldenrod1" "MMgrey"
####genes of significance in purple module under Oxygen
FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMdarkorchid4)>.8
table(FilterGenes)
## FilterGenes
## FALSE TRUE
## 2476 24
Oxygenpurple_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
Oxygenpurple_filtered <- gsub("e.L", "e-L", Oxygenpurple_filtered)
gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
gene_names <- gene_names[,2:3]
names(gene_names)[1] <- "locusID"
Oxygenpurple_filtered <- as.data.frame(Oxygenpurple_filtered)
names(Oxygenpurple_filtered)[1] <- "locusID"
str(Oxygenpurple_filtered)
## 'data.frame': 24 obs. of 1 variable:
## $ locusID: chr "gene-L0Y14_RS09775" "gene-L0Y14_RS16350" "gene-L0Y14_RS12700" "gene-L0Y14_RS07425" ...
Oxygenpurple_filtered <- left_join(Oxygenpurple_filtered,gene_names)
## Joining with `by = join_by(locusID)`
dim(Oxygenpurple_filtered)
## [1] 24 2
names(Oxygenpurple_filtered)
## [1] "locusID" "gene"
list <- 1:24
condition <- rep("Oxygen",length(list))
moduleFil <- rep("purple",length(list))
oxy_purple <- cbind(Oxygenpurple_filtered,condition,moduleFil)
lnames = load(file = "wgcna_manual_network20220910.RData")
lnames =load(file="count_matrix_condition_info_dataInput.RData")
TOM = TOMsimilarityFromExpr(WGCNA_matrix, power = 8,
networkType = "signed hybrid",
TOMType = "signed")
## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
moduleColors = mergedColors
###below we will just use all genes, but module=c("list of color modules you could filter by","eg.black","red")
#module= moduleColors
#module2=c("brown","cyan","midnightblue","blue","magenta","turquoise","black","salmon","pink","green","turqoise")
inModule = is.finite(match(moduleColors, moduleColors))
#inModule2 = is.finite(match(moduleColors, module2))
#table(inModule2)
genes = colnames(WGCNA_matrix)
modGenes = genes[inModule]
#modGenes2=genes[inModule2]
modTOM = TOM[inModule, inModule]
#modTOM2 = TOM[inModule2,inModule2]
dimnames(modTOM) = list(modGenes, modGenes)
#dimnames(modTOM2) = list(modGenes2, modGenes2)
genes_of_note <- read.csv("enzymes_of_note.csv")
names(genes_of_note)
## [1] "broad_function" "function." "gene"
## [4] "locus_ID_andre_new" "X"
genes_of_note <- genes_of_note[,1:4]
names(genes_of_note)[4] <- "modGenes"
cool <- genes_of_note[,c(4,3,2,1)]
write.csv(cool,"cool_genes.csv")
modGenes3 <- as.data.frame(modGenes)
names(modGenes3)[1] <- "modGenes"
#modGenes4 <- as.data.frame(modGenes2)
#names(modGenes4)[1] <- "modGenes"
#str(modGenes4)
gene_annos <- right_join(modGenes3,cool)
## Joining with `by = join_by(modGenes)`
#gene_annos2 <- right_join(modGenes4,cool)
names(gene_annos)
## [1] "modGenes" "gene" "function." "broad_function"
cool_gene_list <- gene_annos[,1]
cool_gene_list <- as.data.frame(cool_gene_list)
names(cool_gene_list)[1] <- "modGenes"
#names(gene_annos2)
#cool_gene_list2 <- gene_annos2[,1]
#cool_gene_list2 <- as.data.frame(cool_gene_list2)
#names(cool_gene_list2)[1] <- "modGenes"
modcool <- inner_join(modGenes3,cool_gene_list)
## Joining with `by = join_by(modGenes)`
#modcool2 <- inner_join(modGenes4,cool_gene_list2)
uncool <- modGenes3[ ! modGenes3$modGenes %in% modcool$modGenes,]
#uncool2 <- modGenes4[ ! modGenes4$modGenes %in% modcool2$modGenes,]
uncool <- as.data.frame(uncool)
#uncool2 <- as.data.frame(uncool2)
names(uncool)[1] <- "modGenes"
#names(uncool2)[1] <- "modGenes"
from_gff <- read.delim("GCF_023733635.1_ASM2373363v1_feature_table.txt")
from_gff$locus_tag = paste0('gene-', from_gff$locus_tag)
names(from_gff)[17] <- "modGenes"
names(from_gff)
## [1] "X..feature" "class"
## [3] "assembly" "assembly_unit"
## [5] "seq_type" "chromosome"
## [7] "genomic_accession" "start"
## [9] "end" "strand"
## [11] "product_accession" "non.redundant_refseq"
## [13] "related_accession" "name"
## [15] "symbol" "GeneID"
## [17] "modGenes" "feature_interval_length"
## [19] "product_length" "attributes"
names(from_gff)[1] <- "feature"
gff2 <- subset(from_gff,feature=="CDS")
names(gff2)
## [1] "feature" "class"
## [3] "assembly" "assembly_unit"
## [5] "seq_type" "chromosome"
## [7] "genomic_accession" "start"
## [9] "end" "strand"
## [11] "product_accession" "non.redundant_refseq"
## [13] "related_accession" "name"
## [15] "symbol" "GeneID"
## [17] "modGenes" "feature_interval_length"
## [19] "product_length" "attributes"
gff3 <- gff2[,c(17,14)]
gff4 <- gff2[,c(17,14,11)]
coolg <- left_join(modcool,cool)
## Joining with `by = join_by(modGenes)`
coolg <- left_join(coolg,gff4)
## Joining with `by = join_by(modGenes)`
coolg <- coolg[,c(1,2,6)]
#coolg2 <- left_join(modcool2,cool)
#coolg2 <- left_join(coolg2,gff4)
#coolg2 <- coolg2[,c(1,2,6)]
str(gff4)
## 'data.frame': 3213 obs. of 3 variables:
## $ modGenes : chr "gene-L0Y14_RS00005" "gene-L0Y14_RS00010" "gene-L0Y14_RS00015" "gene-L0Y14_RS00020" ...
## $ name : chr "chromosomal replication initiator protein DnaA" "DNA polymerase III subunit beta" "IS3 family transposase" "hypothetical protein" ...
## $ product_accession: chr "WP_005958812.1" "WP_006474992.1" "" "WP_138921860.1" ...
#str(uncool2)
#names(uncool)[1] <- "modGenes"
uncoolg <- left_join(uncool,gff3)
## Joining with `by = join_by(modGenes)`
#uncoolg2 <- left_join(uncool2,gff4)
names(uncoolg)[2] <- "gene"
coolg <- coolg[,1:2]
all_genes6 <- rbind(coolg,uncoolg)
#names(uncool2)[1] <- "modGenes"
#uncoolg2 <- left_join(uncool2,gff3)
#uncoolg3 <- left_join(uncool2,gff4)
#names(uncoolg3)[2] <- "gene"
#names(coolg2)
all_genes <- rbind(coolg,uncoolg)
write.csv(all_genes6,"gene_names_for_wgcna_matrix_files_sig_module.csv")
vec1 <- all_genes6[,1]
vec2 <- modGenes
head(vec1)
## [1] "gene-L0Y14_RS04070" "gene-L0Y14_RS01990" "gene-L0Y14_RS12365"
## [4] "gene-L0Y14_RS06850" "gene-L0Y14_RS02365" "gene-L0Y14_RS07425"
reorder_idx <- match(vec2,vec1)
all_genes <- vec1[reorder_idx]
all_genes3 <- as.data.frame(all_genes)
names(all_genes3)[1] <- "modGenes"
genes2 <- left_join(all_genes3,all_genes6)
## Joining with `by = join_by(modGenes)`
genes777 <- genes2[,2]
genes888 <- genes2[,2]
dimnames(modTOM) = list(all_genes, all_genes)
#str(modGenes2)
#moduleColors[inModule]
#head(modTOM2)
#str(modTOM2)
head(all_genes)
## [1] "gene-L0Y14_RS04070" "gene-L0Y14_RS09775" "gene-L0Y14_RS01990"
## [4] "gene-L0Y14_RS09950" "gene-L0Y14_RS09955" "gene-L0Y14_RS12365"
#str(all_genes2)
set.seed(123)
###all_genes2 needs to be a list not dataframe!
cyt_nofilter=cyt = exportNetworkToCytoscape(modTOM,
edgeFile = "CytoscapeEdgeFile20230201.txt",
nodeFile = "CytoscapeNodeFile20230201.txt",
weighted = TRUE,
threshold = 0.0,
nodeNames = all_genes,
altNodeNames = genes777,
nodeAttr = moduleColors[inModule])
cyt_all = exportNetworkToCytoscape(modTOM,
edgeFile = "CytoscapeEdgeFile20231201_0.05.txt",
nodeFile = "CytoscapeNodeFile20231201_0.05.txt",
weighted = TRUE,
threshold = 0.05,
nodeNames = all_genes,
altNodeNames = genes888,
nodeAttr = moduleColors[inModule])
file_path <- "CytoscapeNodeFile20231201_0.05.txt"
lines <- readLines(file_path)
cleaned_data <- list()
column_names <- NULL
# Loop through the lines and process data as needed
for (line in lines) {
# Split the line using tabs as delimiters
elements <- unlist(strsplit(line, "\t"))
if (is.null(column_names)) {
# If column names are not set, use the first line as column names
column_names <- elements
} else {
# Process elements as needed (e.g., remove extra data, format, etc.)
# Append cleaned data to the list
cleaned_data <- append(cleaned_data, list(elements))
}
}
final_data <- as.data.frame(do.call(rbind, cleaned_data), stringsAsFactors = FALSE)
colnames(final_data) <- column_names
table(final_data[,3])
##
## black brown darkcyan darkgoldenrod1 darkgreen
## 98 196 214 79 130
## darkorchid4 deeppink3 greenyellow grey grey60
## 71 199 136 32 155
## midnightblue pink plum4 skyblue yellow
## 59 337 92 106 41
# Replace all instances of color names to match those used in paper in the entire data frame
final_data <- lapply(final_data, function(x) ifelse(x == "darkorchid4", "purple", x))
final_data <- as.data.frame(final_data)
final_data <- lapply(final_data, function(x) ifelse(x == "plum4", "light_purple", x))
final_data <- as.data.frame(final_data)
final_data <- lapply(final_data, function(x) ifelse(x == "darkcyan", "teal", x))
final_data <- as.data.frame(final_data)
final_data <- lapply(final_data, function(x) ifelse(x == "deeppink3", "cherry", x))
final_data <- as.data.frame(final_data)
final_data <- lapply(final_data, function(x) ifelse(x == "yellow", "light_yellow", x))
final_data <- as.data.frame(final_data)
final_data <- lapply(final_data, function(x) ifelse(x == "darkgoldenrod1", "gold", x))
final_data <- as.data.frame(final_data)
final_data <- lapply(final_data, function(x) ifelse(x == "darkgreen","green", x))
final_data <- as.data.frame(final_data)
#write.table(final_data, file = "output_file.txt", sep = "\t", quote = FALSE, row.names = FALSE)
###mean_significance_of_module_for_conditionSulfide############################################################
par(mfrow = c(2,1))
colorh1 <- mergedColors
GS1=as.numeric(cor(biconditions$Sulfide,WGCNA_matrix, use="p"))
head(GS1)
## [1] 0.11559790 0.27041835 0.34126255 -0.19662226 -0.06219137 -0.23047838
GeneSignificance=abs(GS1)
head(GeneSignificance)
## [1] 0.11559790 0.27041835 0.34126255 0.19662226 0.06219137 0.23047838
str(colorh1)
## chr [1:2500] "plum4" "darkorchid4" "midnightblue" "grey60" "grey60" ...
ModuleSignificance=tapply(GeneSignificance, colorh1, mean, na.rm=T)
str(ModuleSignificance)
## num [1:15(1d)] 0.138 0.193 0.309 0.368 0.133 ...
## - attr(*, "dimnames")=List of 1
## ..$ : chr [1:15] "black" "brown" "darkcyan" "darkgoldenrod1" ...
geneSulfideSignificance=as.data.frame(cor(WGCNA_matrix, Sulfide, use = "p"))
SulPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSulfideSignificance), nSamples))
names(geneSulfideSignificance) = paste("Sul.", names(Sulfide), sep="");
names(SulPvalue) = paste("p.Sul.", names(Sulfide), sep="");
x_limits <- range(
abs(geneModuleMembership[moduleGenes7, column7]),
abs(geneModuleMembership[moduleGenes3, column3])
)
head(geneModuleMembership)
## MMmidnightblue MMdarkorchid4 MMyellow MMgreenyellow
## gene-L0Y14_RS04070 -0.2978002 -0.4742295 -0.06228793 -0.12283434
## gene-L0Y14_RS09775 0.6475847 0.8479211 0.16301606 0.38986765
## gene-L0Y14_RS01990 0.7683016 0.2909617 -0.21720940 0.03127418
## gene-L0Y14_RS09950 0.6250107 0.4452151 0.21104415 0.01837192
## gene-L0Y14_RS09955 0.5993774 0.4088890 -0.08634607 -0.20976909
## gene-L0Y14_RS12365 0.3632049 0.1311813 -0.06645301 -0.38765511
## MMblack MMdarkgreen MMdeeppink3 MMpink MMgrey60
## gene-L0Y14_RS04070 -0.16971124 -0.05481641 -0.01719957 -0.01598375 -0.6563363
## gene-L0Y14_RS09775 0.53953441 0.44692677 0.41381637 0.59720934 0.1830737
## gene-L0Y14_RS01990 0.06471391 -0.09289025 0.08717597 0.11976742 0.2550905
## gene-L0Y14_RS09950 0.43427234 0.22863891 0.17923724 0.10431595 0.4740972
## gene-L0Y14_RS09955 0.23088121 -0.01544034 0.08834925 -0.04765631 0.5107174
## gene-L0Y14_RS12365 -0.00202547 -0.12064102 -0.25455458 -0.31551755 0.7070535
## MMbrown MMplum4 MMskyblue MMdarkcyan
## gene-L0Y14_RS04070 -0.1384809 0.6483345 0.2465064 0.25589117
## gene-L0Y14_RS09775 -0.2948900 -0.5986951 -0.2859013 -0.84047590
## gene-L0Y14_RS01990 0.2365179 -0.1068451 -0.5795192 -0.47056380
## gene-L0Y14_RS09950 0.2737269 -0.2366527 -0.6256319 -0.43994105
## gene-L0Y14_RS09955 0.4747395 -0.3438988 -0.6771603 -0.39111707
## gene-L0Y14_RS12365 0.4675125 -0.2198442 -0.5555037 -0.09885019
## MMdarkgoldenrod1 MMgrey
## gene-L0Y14_RS04070 -0.1725039 -0.34377603
## gene-L0Y14_RS09775 -0.4423334 0.33425164
## gene-L0Y14_RS01990 -0.5786492 -0.10188510
## gene-L0Y14_RS09950 -0.3808909 0.07218213
## gene-L0Y14_RS09955 -0.3278772 0.27804497
## gene-L0Y14_RS12365 -0.2081241 0.06962918
# First Plot (plotModuleSignificance)
plotModuleSignificance(GeneSignificance, colorh1,
legend.text = TRUE,
main = "Average gene significance in modules with sulfide variable",
cex.axis = 1)
help(plotModuleSignificance)
table(colorh1)
## colorh1
## black brown darkcyan darkgoldenrod1 darkgreen
## 124 233 239 99 143
## darkorchid4 deeppink3 greenyellow grey grey60
## 100 214 160 242 203
## midnightblue pink plum4 skyblue yellow
## 83 385 109 120 46
par(mar = c(2, 3, 2, 3))
par(mfrow = c(2,1))
###mean_significance_of_module_for_condition_Oxygen#########################################################
sizeGrWindow(8,7)
par(mar = c(2, 3, 2, 3))
par(mfrow = c(2,1))
GS1=as.numeric(cor(biconditions$Oxygen,WGCNA_matrix, use="p"))
GeneSignificance=abs(GS1)
ModuleSignificance=tapply(GeneSignificance, colorh1, mean, na.rm=T)
#sizeGrWindow(8,7)
#par(mfrow = c(1,3))
plotModuleSignificance(GeneSignificance,colorh1,
legend.text=TRUE,
main = "Gene significance in oxygen treatments across modules",
cex.axis=1)
help(plotModuleSignificance)
# Plot for the purple module with suppressed x-axis
#y_limits <- range(
# abs(geneOxygenSignificance[moduleGenes14, 1]),
#abs(geneOxygenSignificance[moduleGenes5, 1])
#)
x_limits <- range(abs(geneModuleMembership[moduleGenes14, column14]),
abs(geneModuleMembership[moduleGenes5, column5]))
nitro <- read.csv("Merged Network_2 default node.csv")
#names(nitro)
nirS <- subset(nitro,nirS=="1")
#nitro$shared.name
#(nitro$neighbors666)
rtca6 <-nitro[,c(133,69)]
nitro1 <- nitro[c(1:55,57:894),c(133,69,106,3,28,38,39,65,66,108,109,120,121,125)]
#nitro1$shared.name
#names(nitro1)
names(nitro1)[3] <- "cbb"
nitro1$cbb <-factor(ifelse(nitro1$cbb=="cbb",1,0))
table(nitro1$cbb)
##
## 0 1
## 373 520
names(nitro1)[2] <- "rtca"
nitro1$rtca <-factor(ifelse(nitro1$rtca=="rtca",1,0))
table(nitro1$rtca)
##
## 0 1
## 745 148
indx <- sapply(nitro1, is.factor)
nitro1[indx] <- lapply(nitro1[indx], function(x) as.numeric(as.character(x)))
str(nitro1)
## 'data.frame': 893 obs. of 14 variables:
## $ shared.name : chr "gene-L0Y14_RS16060" "gene-L0Y14_RS01710" "gene-L0Y14_RS01760" "gene-L0Y14_RS01755" ...
## $ rtca : num 0 0 0 0 0 0 0 0 0 0 ...
## $ cbb : num 1 1 1 1 1 1 1 1 1 0 ...
## $ amtB : int 1 1 1 1 1 1 1 1 1 1 ...
## $ gogat_glnA : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hoa1 : int 1 NA NA 1 NA 1 NA NA 1 NA ...
## $ hoa2 : int NA NA NA NA NA 1 1 NA NA NA ...
## $ nap : int NA NA NA NA NA NA NA NA NA NA ...
## $ narGHI : int NA NA NA NA NA NA NA NA 1 NA ...
## $ nirBD_nasA_nark1: int 1 NA NA 1 1 NA NA NA 1 NA ...
## $ nirS : int NA NA NA NA NA NA 1 NA NA NA ...
## $ norCB : int NA NA NA NA NA NA NA NA NA NA ...
## $ nosZ_cluster : int NA NA NA NA NA NA NA NA NA NA ...
## $ otr : int NA NA NA NA NA 1 1 NA 1 NA ...
nitro1[is.na(nitro1)] = 0
indx <- sapply(nitro1, is.factor)
nitro1[indx] <- lapply(nitro1[indx], function(x) as.numeric(as.character(x)))
upset(nitro1,sets = c("rtca","cbb" ,"amtB","gogat_glnA","hoa1","hoa2","otr","nirBD_nasA_nark1","nap","narGHI","nirS","norCB","nosZ_cluster"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE, order)
letslook <- read.csv("aa_cbb_rtca_merged_with_fixed_rtca_cbb default node.csv")
#names(letslook)
letslook <- letslook[1:1052,c(131,60:109)]
rtca1 <- subset(letslook, neighbors1000 =="rtca")
#rtca1$shared.name
#names(letslook)
table(letslook$neighbors1000)
##
## rtca
## 904 148
names(letslook)[5] <- "rtca"
letslook$rtca <-factor(ifelse(letslook$rtca=="rtca",1,0))
table(letslook$rtca)
##
## 0 1
## 904 148
#names(letslook)
table(letslook$neighbhors184)
##
## lysine_synthesis
## 836 216
which(colnames(letslook)=="neighbhors184")
## [1] 3
names(letslook)[which(colnames(letslook)=="neighbhors184")] <- "lysine_synthesis"
letslook$lysine_synthesis <-factor(ifelse(letslook$lysine_synthesis=="lysine_synthesis",1,0))
table(letslook$lysine_synthesis)
##
## 0 1
## 836 216
#names(letslook)
table(letslook$neighbors666)
##
## cbb
## 532 520
which(colnames(letslook)=="neighbors666")
## [1] 51
names(letslook)[which(colnames(letslook)=="neighbors666")] <- "cbb"
letslook$cbb <-factor(ifelse(letslook$cbb=="cbb",1,0))
table(letslook$cbb)
##
## 0 1
## 532 520
#names(letslook)
table(letslook$neighbors185)
##
## arginine_synthesis
## 1010 42
names(letslook)[which(colnames(letslook)=="neighbors185")] <- "arginine_synthesis"
letslook$arginine_synthesis <-factor(ifelse(letslook$arginine_synthesis=="arginine_synthesis",1,0))
table(letslook$arginine_synthesis)
##
## 0 1
## 1010 42
#names(letslook)
table(letslook$neighbors183)
##
## ornithine_synthesis
## 648 404
names(letslook)[which(colnames(letslook)=="neighbors183")] <- "ornithine_synthesis"
letslook$ornithine_synthesis <-factor(ifelse(letslook$ornithine_synthesis=="ornithine_synthesis",1,0))
table(letslook$ornithine_synthesis)
##
## 0 1
## 648 404
#names(letslook)
table(letslook$neighbors182)
##
## citruline_synthesis
## 941 111
names(letslook)[which(colnames(letslook)=="neighbors182")] <- "citruline_synthesis"
letslook$citruline_synthesis <-factor(ifelse(letslook$citruline_synthesis=="citruline_synthesis",1,0))
table(letslook$citruline_synthesis)
##
## 0 1
## 941 111
#names(letslook)
table(letslook$neighbors181)
##
## asparagine_synth
## 756 296
names(letslook)[which(colnames(letslook)=="neighbors181")] <- "asparagine_synth"
letslook$asparagine_synth <-factor(ifelse(letslook$asparagine_synth=="asparagine_synth",1,0))
table(letslook$asparagine_synth)
##
## 0 1
## 756 296
#names(letslook)
table(letslook$neighbors186)
##
## shikimate
## 674 378
names(letslook)[which(colnames(letslook)=="neighbors186")] <- "shikimate"
letslook$shikimate <-factor(ifelse(letslook$shikimate=="shikimate",1,0))
table(letslook$shikimate)
##
## 0 1
## 674 378
#names(letslook)
table(letslook$neighbors187)
##
## tryptophan_synthesis
## 961 91
names(letslook)[which(colnames(letslook)=="neighbors187")] <- "tryptophan_synthesis"
letslook$tryptophan_synthesis <-factor(ifelse(letslook$tryptophan_synthesis=="tryptophan_synthesis",1,0))
table(letslook$tryptophan_synthesis)
##
## 0 1
## 961 91
#names(letslook)
table(letslook$neighbors189)
##
## phenylalanine_tyrosine_synth
## 1029 23
names(letslook)[which(colnames(letslook)=="neighbors189")] <- "phenylalanine_tyrosine_synth"
letslook$phenylalanine_tyrosine_synth <-factor(ifelse(letslook$phenylalanine_tyrosine_synth=="phenylalanine_tyrosine_synth",1,0))
table(letslook$phenylalanine_tyrosine_synth)
##
## 0 1
## 1029 23
#names(letslook)
table(letslook$neighbors200)
##
## histidine_synthesis
## 836 216
names(letslook)[which(colnames(letslook)=="neighbors200")] <- "histidine_synthesis"
letslook$histidine_synthesis <-factor(ifelse(letslook$histidine_synthesis=="histidine_synthesis",1,0))
table(letslook$histidine_synthesis)
##
## 0 1
## 836 216
#names(letslook)
table(letslook$neighbors202)
##
## valine_synthesis
## 1003 49
names(letslook)[which(colnames(letslook)=="neighbors202")] <- "valine_synthesis"
letslook$valine_synthesis <-factor(ifelse(letslook$valine_synthesis=="valine_synthesis",1,0))
table(letslook$valine_synthesis)
##
## 0 1
## 1003 49
#names(letslook)
table(letslook$neighbors190)
##
## luecine_isoluecine_synthesis
## 1002 50
names(letslook)[which(colnames(letslook)=="neighbors190")] <- "luecine_isoluecine_synthesis"
letslook$luecine_isoluecine_synthesis <-factor(ifelse(letslook$luecine_isoluecine_synthesis=="luecine_isoluecine_synthesis",1,0))
table(letslook$luecine_isoluecine_synthesis)
##
## 0 1
## 1002 50
#names(letslook)
indx <- sapply(letslook, is.factor)
letslook[indx] <- lapply(letslook[indx], function(x) as.numeric(as.character(x)))
upset(letslook,sets = c("rtca","cbb" ,"lysine_synthesis","asparagine_synth","citruline_synthesis","ornithine_synthesis","arginine_synthesis","shikimate","tryptophan_synthesis","phenylalanine_tyrosine_synth","luecine_isoluecine_synthesis","histidine_synthesis","valine_synthesis"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)
sulfy <- read.csv("cfix_sulfide_merged_binary default node.csv")
#names(sulfy)
sulfy1 <- sulfy[1:890,c(131:135,127,129,5,22,27,68,107)]
which(colnames(sulfy1)=="sbp")
## [1] 7
#names(sulfy1)
names(sulfy1)[12] <- "cbb"
sulfy1$cbb <-factor(ifelse(sulfy1$cbb=="cbb",1,0))
table(sulfy1$cbb)
##
## 0 1
## 370 520
names(sulfy1)[11] <- "rtca"
sulfy1$rtca <-factor(ifelse(sulfy1$rtca=="rtca",1,0))
table(sulfy1$rtca)
##
## 0 1
## 741 149
indx <- sapply(sulfy1, is.factor)
sulfy1[indx] <- lapply(sulfy1[indx], function(x) as.numeric(as.character(x)))
#str(sulfy1)
sulfy1[is.na(sulfy1)] = 0
indx <- sapply(sulfy1, is.factor)
sulfy1[indx] <- lapply(sulfy1[indx], function(x) as.numeric(as.character(x)))
#names(sulfy1)
upset(sulfy1,sets = c("rtca","cbb" ,"sqr","fccA","sbp","sox","soe","sreA","dsr_tusA","apr_sat","qmoABhdrCB"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)
electron <- read.csv("rtca_cbb_hyd_etc_atpases_merged_binary default node.csv")
#names(electron)
electro1 <- electron[,c(94,68,70,12,25,38,39,63,84,86,106,107)]
#names(electro1)
names(electro1)[3] <- "cbb"
electro1$cbb <-factor(ifelse(electro1$cbb=="cbb",1,0))
table(electro1$cbb)
##
## 0 1
## 485 520
names(electro1)[2] <- "rtca"
electro1$rtca <-factor(ifelse(electro1$rtca=="rtca",1,0))
table(electro1$rtca)
##
## 0 1
## 856 149
indx <- sapply(electro1, is.factor)
electro1[indx] <- lapply(electro1[indx], function(x) as.numeric(as.character(x)))
#str(electro1)
electro1[is.na(electro1)] = 0
indx <- sapply(electro1, is.factor)
electro1[indx] <- lapply(electro1[indx], function(x) as.numeric(as.character(x)))
#names(electro1)
upset(electro1,sets = c("rtca","cbb" ,"nuo","pet","cco","mbx","F1atpase","V1atpase","V2atpase","hyd1e","hyd3b"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)
de<-read.csv('entotal_202200815_use_this_corrected_locus_tags.csv',header=TRUE,stringsAsFactors=FALSE)
sig<-de[de$adj.P.Val<=0.05,]
sig0<-sig[sig$logFC<=-1,]
sig1<-sig[sig$logFC>=1,]
sig<-rbind(sig0,sig1)
a<-subset(sig,comparison=='LS_vs_HS_noN_HO')
b<-subset(sig,comparison=='LS_vs_HS_N_HO')
c<-subset(sig,comparison=='H2_vs_HS_N_HO')
d<-subset(sig,comparison=='H2_vs_HS_noN_HO')
e<-subset(sig,comparison=='SW_vs_HS_noN_HO')
f<-subset(sig,comparison=='H2_vs_LS_N_HO')
g<-subset(sig,comparison=='H2_vs_LS_noN_HO')
h<-subset(sig,comparison=='SW_vs_LS_noN_HO')
#i<-subset(sig,comparison=='LO_vs_HO_H2_N')no de!!!
j <- subset(sig,comparison=='LO_vs_HO_H2_noN')
k <- subset(sig,comparison=='LO_vs_HO_HS_N')
l <- subset(sig,comparison=='noN_vs_N_H2_HO')
m <- subset(sig,comparison=='noN_vs_N_HS_HO')
n <- subset(sig,comparison=='noN_vs_N_LS_HO')
sulfide<-rbind(a,b,c,d,e,f,g,h)
no_hydrogen_sulfide<-rbind(a,b,e,h)
sulfide_nolowSvsH2 <- rbind(a,b,c,d,e)
oxygen <- rbind(j,k)
nitrogen <- rbind(l,m,n)
my.colors.sulfide<-
scale_color_manual(values=c(
'LS_vs_HS_noN_HO'='#A982AA',
'LS_vs_HS_N_HO'='#8E4162',
'H2_vs_HS_N_HO'='#404E7C',
'H2_vs_HS_noN_HO'='#6C91BF',
'SW_vs_HS_noN_HO'='#EEA58C',
'H2_vs_LS_N_HO'='#7EA172',
'H2_vs_LS_noN_HO'='#C7CB85',
'SW_vs_LS_noN_HO'='#000000'))
my.colors.oxygen <-
scale_color_manual(values=c(
'LO_vs_HO_HS_N'="#022b3a",
'LO_vs_HO_H2_noN'="#2a9d8f"))
my.colors.nitrogen <-
scale_color_manual(values=c(
'noN_vs_N_H2_HO'="#264653",
'noN_vs_N_HS_HO'="#2a9d8f",
'noN_vs_N_LS_HO'="#e9c46a"))
my.colors.sulfide_nolowSvsH2 <-
scale_color_manual(values=c(
'LS_vs_HS_noN_HO'='#A982AA',
'LS_vs_HS_N_HO'='#8E4162',
'H2_vs_HS_N_HO'='#404E7C',
'H2_vs_HS_noN_HO'='#6C91BF',
'SW_vs_HS_noN_HO'='#EEA58C'))
my.theme <- theme(axis.title= element_text(face = "plain",size = 20),
axis.text = element_text(face = "plain",size = 15, colour = "black"),
plot.caption = element_text(hjust = 0),
panel.background=element_rect(fill="white"),
panel.border=element_rect(colour="black",fill=NA,linewidth = 1),
axis.text.x = element_text(angle = 90),
axis.line.x.top = element_blank(),
)
#####setup for individual comparisons
sig$Threshold<-cut(sig$logFC,breaks=c(-Inf,1,Inf),labels=c("<=1",">1"))
LS_vs_HS_noN_HO <-subset(sig,comparison=='LS_vs_HS_noN_HO')
LS_vs_HS_N_HO <-subset(sig,comparison=='LS_vs_HS_N_HO')
H2_vs_HS_N_HO <-subset(sig,comparison=='H2_vs_HS_N_HO')
H2_vs_HS_noN_HO <-subset(sig,comparison=='H2_vs_HS_noN_HO')
SW_vs_HS_noN_HO <-subset(sig,comparison=='SW_vs_HS_noN_HO')
LO_vs_HO_H2_noN<- subset(sig,comparison=='LO_vs_HO_H2_noN')
LO_vs_HO_HS_N <- subset(sig,comparison=='LO_vs_HO_HS_N')
H2_vs_LS_N_HO<-subset(sig,comparison=='H2_vs_LS_N_HO')
H2_vs_LS_noN_HO<-subset(sig,comparison=='H2_vs_LS_noN_HO')
SW_vs_LS_noN_HO<-subset(sig,comparison=='SW_vs_LS_noN_HO')
noN_vs_N_H2_HO <- subset(sig,comparison=='noN_vs_N_H2_HO')
noN_vs_N_HS_HO <- subset(sig,comparison=='noN_vs_N_HS_HO')
noN_vs_N_LS_HO <- subset(sig,comparison=='noN_vs_N_LS_HO')
my.colors.ind.comparisons <-
scale_color_manual(values=c(
'>1'='#F2444D',
'<=1'='#3BD4CC'))
###### sulfide in gold
names(sulfide_hub_gold)
## [1] "locusID" "gene" "condition" "moduleFil"
names(sulfide_hub_gold)[2] <- "Predicted_function"
names(sulfide_nolowSvsH2)
## [1] "X.1" "locus_ID_andre_new"
## [3] "geneid" "locus_ID_andre_old"
## [5] "start.x" "stop.x"
## [7] "gene_letters_andre" "locus_tag_andre"
## [9] "accession_andre" "start"
## [11] "stop" "notes"
## [13] "Ontology" "Inference"
## [15] "locus_tag" "product"
## [17] "protein_id" "gene_letters.notes"
## [19] "NCBI_ID" "annotations_from_andre"
## [21] "target_id" "Accession"
## [23] "Predicted_function" "functional_role"
## [25] "sub_role" "sub_role2"
## [27] "Protein" "Accession...Protein"
## [29] "seed_ortholog_evalue" "seed_ortholog_score"
## [31] "best_tax_level" "Preferred_name"
## [33] "GOs" "X"
## [35] "KEGG_ko" "KEGG_Pathway"
## [37] "KEGG_Module" "KEGG_Reaction"
## [39] "KEGG_rclass" "BRITE"
## [41] "KEGG_TC" "CAZy"
## [43] "BiGG_Reaction" "tax_scope"
## [45] "eggNOG_OGs" "bestOG"
## [47] "COG_Functional_Category" "eggNOG_free_text_description"
## [49] "logFC" "AveExpr"
## [51] "t" "P.value"
## [53] "adj.P.Val" "B"
## [55] "comparison"
names(sulfide_nolowSvsH2)[2] <- "locusID"
names(sulfide_nolowSvsH2)
## [1] "X.1" "locusID"
## [3] "geneid" "locus_ID_andre_old"
## [5] "start.x" "stop.x"
## [7] "gene_letters_andre" "locus_tag_andre"
## [9] "accession_andre" "start"
## [11] "stop" "notes"
## [13] "Ontology" "Inference"
## [15] "locus_tag" "product"
## [17] "protein_id" "gene_letters.notes"
## [19] "NCBI_ID" "annotations_from_andre"
## [21] "target_id" "Accession"
## [23] "Predicted_function" "functional_role"
## [25] "sub_role" "sub_role2"
## [27] "Protein" "Accession...Protein"
## [29] "seed_ortholog_evalue" "seed_ortholog_score"
## [31] "best_tax_level" "Preferred_name"
## [33] "GOs" "X"
## [35] "KEGG_ko" "KEGG_Pathway"
## [37] "KEGG_Module" "KEGG_Reaction"
## [39] "KEGG_rclass" "BRITE"
## [41] "KEGG_TC" "CAZy"
## [43] "BiGG_Reaction" "tax_scope"
## [45] "eggNOG_OGs" "bestOG"
## [47] "COG_Functional_Category" "eggNOG_free_text_description"
## [49] "logFC" "AveExpr"
## [51] "t" "P.value"
## [53] "adj.P.Val" "B"
## [55] "comparison"
sulfide_comp_DE <- sulfide_nolowSvsH2[,c(2,3,5,6,7,9:14,16:55)]
shg_vector <- sulfide_hub_gold[,1]
shg_vector
## [1] "gene-L0Y14_RS01985" "gene-L0Y14_RS07695" "gene-L0Y14_RS00080"
## [4] "gene-L0Y14_RS12030" "gene-L0Y14_RS12025" "gene-L0Y14_RS12035"
## [7] "gene-L0Y14_RS11990" "gene-L0Y14_RS11995" "gene-L0Y14_RS12020"
## [10] "gene-L0Y14_RS12015" "gene-L0Y14_RS12045" "gene-L0Y14_RS12000"
## [13] "gene-L0Y14_RS11980" "gene-L0Y14_RS11975" "gene-L0Y14_RS11965"
## [16] "gene-L0Y14_RS12040" "gene-L0Y14_RS11985" "gene-L0Y14_RS12010"
## [19] "gene-L0Y14_RS11255" "gene-L0Y14_RS10315" "gene-L0Y14_RS10410"
## [22] "gene-L0Y14_RS12005"
shg_DE <- subset(sulfide_comp_DE, locusID %in% shg_vector)
shg_DE_names_added <- shg_DE %>%
left_join(sulfide_hub_gold, by = "locusID", suffix = c("", "_sulfide_hub_gold"))
names(shg_DE_names_added)
## [1] "locusID" "geneid"
## [3] "start.x" "stop.x"
## [5] "gene_letters_andre" "accession_andre"
## [7] "start" "stop"
## [9] "notes" "Ontology"
## [11] "Inference" "product"
## [13] "protein_id" "gene_letters.notes"
## [15] "NCBI_ID" "annotations_from_andre"
## [17] "target_id" "Accession"
## [19] "Predicted_function" "functional_role"
## [21] "sub_role" "sub_role2"
## [23] "Protein" "Accession...Protein"
## [25] "seed_ortholog_evalue" "seed_ortholog_score"
## [27] "best_tax_level" "Preferred_name"
## [29] "GOs" "X"
## [31] "KEGG_ko" "KEGG_Pathway"
## [33] "KEGG_Module" "KEGG_Reaction"
## [35] "KEGG_rclass" "BRITE"
## [37] "KEGG_TC" "CAZy"
## [39] "BiGG_Reaction" "tax_scope"
## [41] "eggNOG_OGs" "bestOG"
## [43] "COG_Functional_Category" "eggNOG_free_text_description"
## [45] "logFC" "AveExpr"
## [47] "t" "P.value"
## [49] "adj.P.Val" "B"
## [51] "comparison" "Predicted_function_sulfide_hub_gold"
## [53] "condition" "moduleFil"
tesstes <- shg_DE_names_added[,c(19,28,52)]
shg_DE2 <- shg_DE_names_added[,c(1:18,20:54)]
names(shg_DE2)
## [1] "locusID" "geneid"
## [3] "start.x" "stop.x"
## [5] "gene_letters_andre" "accession_andre"
## [7] "start" "stop"
## [9] "notes" "Ontology"
## [11] "Inference" "product"
## [13] "protein_id" "gene_letters.notes"
## [15] "NCBI_ID" "annotations_from_andre"
## [17] "target_id" "Accession"
## [19] "functional_role" "sub_role"
## [21] "sub_role2" "Protein"
## [23] "Accession...Protein" "seed_ortholog_evalue"
## [25] "seed_ortholog_score" "best_tax_level"
## [27] "Preferred_name" "GOs"
## [29] "X" "KEGG_ko"
## [31] "KEGG_Pathway" "KEGG_Module"
## [33] "KEGG_Reaction" "KEGG_rclass"
## [35] "BRITE" "KEGG_TC"
## [37] "CAZy" "BiGG_Reaction"
## [39] "tax_scope" "eggNOG_OGs"
## [41] "bestOG" "COG_Functional_Category"
## [43] "eggNOG_free_text_description" "logFC"
## [45] "AveExpr" "t"
## [47] "P.value" "adj.P.Val"
## [49] "B" "comparison"
## [51] "Predicted_function_sulfide_hub_gold" "condition"
## [53] "moduleFil"
names(shg_DE2)[51] <- "Predicted_function"
#write_csv(shg_DE2,"shg_DE2.csv")
###now I have to reorder this nonsense
list(unique(shg_DE2$locusID))
## [[1]]
## [1] "gene-L0Y14_RS10410" "gene-L0Y14_RS00080" "gene-L0Y14_RS01985"
## [4] "gene-L0Y14_RS07695" "gene-L0Y14_RS11995" "gene-L0Y14_RS11990"
## [7] "gene-L0Y14_RS11985" "gene-L0Y14_RS11965" "gene-L0Y14_RS10315"
## [10] "gene-L0Y14_RS11255" "gene-L0Y14_RS11980" "gene-L0Y14_RS12045"
## [13] "gene-L0Y14_RS12000" "gene-L0Y14_RS11975" "gene-L0Y14_RS12005"
## [16] "gene-L0Y14_RS12015" "gene-L0Y14_RS12020" "gene-L0Y14_RS12010"
## [19] "gene-L0Y14_RS12040" "gene-L0Y14_RS12030" "gene-L0Y14_RS12025"
## [22] "gene-L0Y14_RS12035"
list(unique(shg_DE2$Predicted_function))
## [[1]]
## [1] "leuC" "nitrogen regulation protein NR(II)"
## [3] "nirB" "frdB_1"
## [5] "flxD" "hdrA"
## [7] "korC" "hdrB"
## [9] "U32 family peptidase" "DNA repair protein RecN"
## [11] "korB_1" "mdh_1"
## [13] "flxC" "korA_1"
## [15] "flxB" "aclB_full"
## [17] "aclA" "flxA"
## [19] "pntB" "acnA"
## [21] "idh" "pntA"
order <- c("hdrB","korA_1","korB_1","korC","hdrA","flxD","flxC","flxB","flxA","aclB_full","aclA","idh","acnA","pntA","pntB","mdh_1","frdB_1","nitrogen regulation protein NR(II)","nirB","leuC","U32 family peptidase", "DNA repair protein RecN")
shg_DE.plot <-ggplot(shg_DE2,
aes(x=factor(Predicted_function,levels = order), y=logFC,color=comparison))
shg_DE.plot +
ggtitle("Hub genes in gold module")+
geom_jitter(size=5,shape=18, width = 0.32, height = 0.32)+
geom_vline(xintercept = c(1.5,2.5,3.5, 4.5, 5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5), color = "grey") +
my.colors.sulfide_nolowSvsH2 +
my.theme
####now doing this for sulfide_teal, the otehr module that is significant for sulfide
names(sul.teal)
## [1] "locusID" "gene" "condition" "moduleFil"
names(sul.teal)[2] <- "Predicted_function"
sht_vector <- sul.teal[,1]
#sht_vector
sht_DE <- subset(sulfide_comp_DE, locusID %in% sht_vector)
#names(sht_DE)
sht_DE_names_added <- sht_DE %>%
left_join(sul.teal, by = "locusID", suffix = c("", "_sulfide_hub_teal"))
#names(sht_DE_names_added)
sht_DE2 <- sht_DE_names_added[,c(1:18,20:54)]
#names(sht_DE2)
names(sht_DE2)[51] <- "Predicted_function"
names_shtDE <- unique(sht_DE$locusID)
setdiff(names_shtDE,sht_vector)
## character(0)
### shortening some of the names
#write.csv(sht_DE2,"sht_DE2.csv")
getwd()
## [1] "/Users/jessicamitchell/Library/CloudStorage/OneDrive-HarvardUniversity/OD_documents/Cfixpaper"
###pulling up dataframe with gene names and functions added
sht_DE3 <- read.csv("sht_DE2.csv")
sht_DE3.plot <-ggplot(sht_DE3,
aes(x=factor(Preferred_name), y=logFC,color=comparison))
sht_DE3.plot +
ggtitle("Hub genes in teal module")+
geom_point(size=5,shape=18) +
my.colors.sulfide_nolowSvsH2 +
my.theme
###Going to have to add my own short names to make above figure run right...
###### oxygen in purple
names(oxy_purple)
## [1] "locusID" "gene" "condition" "moduleFil"
names(oxy_purple)[2] <- "Predicted_function"
names(oxygen)[2] <- "locusID"
ohp_vector <- oxy_purple[,1]
#ohp_vector
ohp_DE <- subset(oxygen, locusID %in% ohp_vector)
#names(ohp_DE)
###just the locusID and Preferred_name
ohp_DE1 <- ohp_DE[,c(1,6,28)]
ohp_DE2 <- unique(ohp_DE1)
ohp3 <- merge(ohp_DE2, oxy_purple)
names(ohp3)[3] <- "short.names"
#names(ohp_DE)
#write_csv(ohp3, "ohp4.csv")
#added short.names and notes
ohp4 <- read.csv("ohp4.csv")
ohp_DE3 <- merge(ohp_DE,ohp4, by = "locusID")
ohp_DE.plot <-ggplot(ohp_DE3,
aes(x=factor(short.names), y=logFC,color=comparison))
ohp_DE.plot +
ggtitle("Hub genes in purple module")+
geom_point(size=5,shape=18) +
my.colors.oxygen +
my.theme
###### oxygen in black
#names(oxy_b)
names(oxy_b)[2] <- "Predicted_function"
ohb_vector <- oxy_b[,1]
ohb_vector
## [1] "gene-L0Y14_RS14085" "gene-L0Y14_RS12670" "gene-L0Y14_RS08400"
## [4] "gene-L0Y14_RS05880" "gene-L0Y14_RS13180" "gene-L0Y14_RS15950"
## [7] "gene-L0Y14_RS14595" "gene-L0Y14_RS13130" "gene-L0Y14_RS14970"
## [10] "gene-L0Y14_RS03820" "gene-L0Y14_RS07270" "gene-L0Y14_RS04045"
## [13] "gene-L0Y14_RS03375" "gene-L0Y14_RS14700" "gene-L0Y14_RS11330"
## [16] "gene-L0Y14_RS15890" "gene-L0Y14_RS06760" "gene-L0Y14_RS00450"
## [19] "gene-L0Y14_RS08420" "gene-L0Y14_RS10875" "gene-L0Y14_RS00105"
## [22] "gene-L0Y14_RS01330" "gene-L0Y14_RS16130" "gene-L0Y14_RS02180"
## [25] "gene-L0Y14_RS08830" "gene-L0Y14_RS06620" "gene-L0Y14_RS01145"
## [28] "gene-L0Y14_RS11140" "gene-L0Y14_RS14660" "gene-L0Y14_RS09840"
## [31] "gene-L0Y14_RS03075" "gene-L0Y14_RS06585" "gene-L0Y14_RS08700"
ohb_DE <- subset(oxygen, locusID %in% ohb_vector)
#names(ohb_DE)
###just the locusID,accession and Preferred_name
ohb <- ohb_DE[,c(2,9,20,32)]
ohb <- unique(ohb)
names(ohb)[4] <- "short.names"
#write_csv(ohb, "ohb2.csv")
#added short.names and notes
ohb2 <- read.csv("ohb2.csv")
ohb2_DE3 <- merge(ohb_DE,ohb2, by = "locusID")
ohb2_DE3.plot <-ggplot(ohb2_DE3,
aes(x=factor(short.names), y=logFC,color=comparison))
ohb2_DE3.plot +
ggtitle("Hub genes in black module")+
geom_point(size=5,shape=18) +
my.colors.oxygen +
my.theme
######putting all of this together, so that I have a workable file I can use in adobe illustrator
all_together <- read.csv("wgcna_trait_module_hub_genes.csv")
names(all_together)
## [1] "locusID" "short.names" "moduleFil" "func_category"
## [5] "X"
all_together <- all_together[,1:4]
shg_DE_av <- shg_DE2[,c(1,44,50,53)]
head(shg_DE_av)
## locusID logFC comparison moduleFil
## 1 gene-L0Y14_RS10410 -6.607549 LS_vs_HS_noN_HO darkgoldenrod1
## 2 gene-L0Y14_RS00080 -3.742340 LS_vs_HS_noN_HO darkgoldenrod1
## 3 gene-L0Y14_RS01985 -5.245126 LS_vs_HS_noN_HO darkgoldenrod1
## 4 gene-L0Y14_RS07695 4.361391 LS_vs_HS_noN_HO darkgoldenrod1
## 5 gene-L0Y14_RS11995 2.598792 LS_vs_HS_noN_HO darkgoldenrod1
## 6 gene-L0Y14_RS11990 2.739793 LS_vs_HS_noN_HO darkgoldenrod1
# Calculate the average logFC for each locusID in gold sulfide
avg_logFC_df <- shg_DE_av %>%
group_by(locusID, moduleFil) %>%
summarise(
avg_logFC = mean(logFC, na.rm = TRUE),
sd_logFC = sd(logFC, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
avg_logFC_df <- shg_DE_av %>%
group_by(locusID, moduleFil) %>%
summarise(
n = n(), # Add the count of non-NA values
avg_logFC = mean(logFC, na.rm = TRUE),
sd_logFC = sd(logFC, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 5
## locusID moduleFil n avg_logFC sd_logFC
## <chr> <chr> <int> <dbl> <dbl>
## 1 gene-L0Y14_RS00080 darkgoldenrod1 3 -4.58 0.732
## 2 gene-L0Y14_RS01985 darkgoldenrod1 3 -5.89 0.580
## 3 gene-L0Y14_RS07695 darkgoldenrod1 4 5.48 1.11
## 4 gene-L0Y14_RS10315 darkgoldenrod1 3 2.38 0.487
## 5 gene-L0Y14_RS10410 darkgoldenrod1 3 -5.97 0.737
## 6 gene-L0Y14_RS11255 darkgoldenrod1 4 2.70 0.991
avg_logFC_df <- avg_logFC_df %>%
mutate(treatment_logFC_ave = "logFC_sulfide_limiting")
# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 6
## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave
## <chr> <chr> <int> <dbl> <dbl> <chr>
## 1 gene-L0Y14_RS00080 darkgoldenrod1 3 -4.58 0.732 logFC_sulfide_limi…
## 2 gene-L0Y14_RS01985 darkgoldenrod1 3 -5.89 0.580 logFC_sulfide_limi…
## 3 gene-L0Y14_RS07695 darkgoldenrod1 4 5.48 1.11 logFC_sulfide_limi…
## 4 gene-L0Y14_RS10315 darkgoldenrod1 3 2.38 0.487 logFC_sulfide_limi…
## 5 gene-L0Y14_RS10410 darkgoldenrod1 3 -5.97 0.737 logFC_sulfide_limi…
## 6 gene-L0Y14_RS11255 darkgoldenrod1 4 2.70 0.991 logFC_sulfide_limi…
shg_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
#write.csv(shg_DE_av2,"shg_DE_av2.csv")
dim(shg_DE_av2)
## [1] 22 9
n_val_sug <- shg_DE_av2[,c(1,3)]
####added "broad_category" column
shg_DE_av3 <- read.csv("shg_DE_av2.csv")
dim(shg_DE_av3)
## [1] 22 10
####now doing this for the teal sulfide module
#names(sht_DE3)
v <- unique(sht_DE3$locusID)
sht_DE3_av <- sht_DE3[,c(2,45,51,54)]
head(sht_DE3_av)
## locusID logFC comparison moduleFil
## 1 gene-L0Y14_RS00400 -5.671978 LS_vs_HS_noN_HO teal
## 2 gene-L0Y14_RS01755 -1.093415 LS_vs_HS_noN_HO teal
## 3 gene-L0Y14_RS04610 -1.158590 LS_vs_HS_noN_HO teal
## 4 gene-L0Y14_RS05075 -1.192507 LS_vs_HS_noN_HO teal
## 5 gene-L0Y14_RS05080 -2.083575 LS_vs_HS_noN_HO teal
## 6 gene-L0Y14_RS07060 -1.072405 LS_vs_HS_noN_HO teal
avg_logFC_df <- sht_DE3_av %>%
group_by(locusID, moduleFil) %>%
summarise(
avg_logFC = mean(logFC, na.rm = TRUE),
sd_logFC = sd(logFC, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
avg_logFC_df <- sht_DE3_av %>%
group_by(locusID, moduleFil) %>%
summarise(
n = n(), # Add the count of non-NA values
avg_logFC = mean(logFC, na.rm = TRUE),
sd_logFC = sd(logFC, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 5
## locusID moduleFil n avg_logFC sd_logFC
## <chr> <chr> <int> <dbl> <dbl>
## 1 gene-L0Y14_RS00400 teal 4 -6.35 0.877
## 2 gene-L0Y14_RS00515 teal 3 1.86 0.534
## 3 gene-L0Y14_RS00525 teal 4 2.52 0.570
## 4 gene-L0Y14_RS00625 teal 3 1.86 0.534
## 5 gene-L0Y14_RS00655 teal 4 2.62 0.768
## 6 gene-L0Y14_RS01660 teal 3 -2.42 0.0345
avg_logFC_df <- avg_logFC_df %>%
mutate(treatment_logFC_ave = "logFC_sulfide_limiting")
# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 6
## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave
## <chr> <chr> <int> <dbl> <dbl> <chr>
## 1 gene-L0Y14_RS00400 teal 4 -6.35 0.877 logFC_sulfide_limiting
## 2 gene-L0Y14_RS00515 teal 3 1.86 0.534 logFC_sulfide_limiting
## 3 gene-L0Y14_RS00525 teal 4 2.52 0.570 logFC_sulfide_limiting
## 4 gene-L0Y14_RS00625 teal 3 1.86 0.534 logFC_sulfide_limiting
## 5 gene-L0Y14_RS00655 teal 4 2.62 0.768 logFC_sulfide_limiting
## 6 gene-L0Y14_RS01660 teal 3 -2.42 0.0345 logFC_sulfide_limiting
sht_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
dim(sht_DE_av2)
## [1] 50 9
names(sht_DE_av2)
## [1] "locusID" "moduleFil.x" "n"
## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
## [7] "short.names" "moduleFil.y" "func_category"
sht_n <- sht_DE_av2[,c(1,3)]
#write.csv(sht_DE_av2,"sht_DE_av2.csv")
#write.csv(sht_DE_av2,"sht_DE_av5.csv")
###added broad categories so I can use
sht_DE_av3_old <- read.csv("sht_DE_av3.csv")
sht_DE_av5_new <- read.csv("sht_DE_av5.csv")
names(sht_DE_av5_new)
## [1] "X" "locusID" "moduleFil.x"
## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
## [7] "short.names" "moduleFil.y" "func_category"
## [10] "broad_category"
names(sht_DE_av3_old)
## [1] "X" "locusID" "moduleFil.x"
## [4] "avg_logFC" "treatment_logFC_ave" "short.names"
## [7] "moduleFil.y" "func_category" "broad_category"
vec1 <- (sht_DE_av3_old)[2]
vec2 <- (sht_DE_av5_new)[2]
differences <- anti_join(vec1,vec2)
## Joining with `by = join_by(locusID)`
whatdiff <- merge(differences,sht_DE_av3_old, by = "locusID", all.x = TRUE)
whatdiff$short.names
## [1] "xerC" "Na_H_Exchanger" "pth"
## [4] "dsrE_3" "AAA_LonB" "dxs"
## [7] "hypo" "gsiA" "recQ"
## [10] "glyco_like_mftF " "tmk" "frdA_1"
## [13] "ppdk" "cvpA" "hflC"
## [16] "YdcH" "malQ" "HpcH"
## [19] "recD" "gspB" "trxA"
## [22] "glgP" "urvD" "yidC"
## [25] "yidD"
sht_DE_av3_old_filtered <- sht_DE_av3_old %>%
filter(locusID %in% unlist(vec2))
names(sht_DE_av5_new)
## [1] "X" "locusID" "moduleFil.x"
## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
## [7] "short.names" "moduleFil.y" "func_category"
## [10] "broad_category"
sht_DE_new_justSD <- sht_DE_av5_new[,c(2,5)]
sht_DE_new <- merge(sht_DE_av3_old_filtered,sht_DE_new_justSD, by= "locusID",all.x = TRUE)
head(sht_DE_new)
## locusID X moduleFil.x avg_logFC treatment_logFC_ave short.names
## 1 gene-L0Y14_RS00400 2 teal -6.353622 logFC_sulfide_limiting torF
## 2 gene-L0Y14_RS00515 3 teal 1.860711 logFC_sulfide_limiting norB
## 3 gene-L0Y14_RS00525 4 teal 2.519077 logFC_sulfide_limiting nirS
## 4 gene-L0Y14_RS00625 5 teal 1.860711 logFC_sulfide_limiting norB1
## 5 gene-L0Y14_RS00655 6 teal 2.618491 logFC_sulfide_limiting norD2
## 6 gene-L0Y14_RS01660 9 teal -2.423910 logFC_sulfide_limiting psrA
## moduleFil.y func_category broad_category
## 1 sulfide_teal_nitro_midnightblue porin transport
## 2 sulfide_teal denitrification nitrogen
## 3 sulfide_teal denitrification nitrogen
## 4 sulfide_teal denitrification nitrogen
## 5 sulfide_teal denitrification nitrogen
## 6 sulfide_teal nucleotide biosynthesis biosynthesis
## sd_logFC
## 1 0.87692331
## 2 0.53357947
## 3 0.57017737
## 4 0.53357947
## 5 0.76846294
## 6 0.03451484
#write.csv(sht_DE_new,"sht_DE_new.csv")
# Filter out 'unknown'
sht_DE_new_filtered <- sht_DE_new %>%
filter(broad_category != "unknown")
names(sht_DE_new_filtered)
## [1] "locusID" "X" "moduleFil.x"
## [4] "avg_logFC" "treatment_logFC_ave" "short.names"
## [7] "moduleFil.y" "func_category" "broad_category"
## [10] "sd_logFC"
names(shg_DE_av3)
## [1] "X" "locusID" "moduleFil.x"
## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
## [7] "short.names" "moduleFil.y" "func_category"
## [10] "broad_category"
sht_DE <- sht_DE_new_filtered[,c(1,3:10)]
shg_DE <- shg_DE_av3[,c(2:10)]
dim(shg_DE)
## [1] 22 9
dim(sht_DE)
## [1] 42 9
gold_teal <- rbind(sht_DE,shg_DE)
gold_teal_filtered <- gold_teal %>%
filter(broad_category != "unknown")
######putting them all together
#reordering the gene names so they appear on th xaxis in the right order
sht_DE_new_filtered$short.names
## [1] "torF" "norB" "nirS" "norB1" "norD2"
## [6] "psrA" "fusA" "trmD" "ibpA" "pyrG"
## [11] "ftsH" "rimP" "nusA" "fusA2" "napG"
## [16] "moaA" "fusA_2" "holA" "hupE" "HyiA_small"
## [21] "isp1" "isp2" "HyiB_large" "frdB_1" "frdC_1"
## [26] "htpX" "cmoB" "korB_2" "korA_2" "rhlE"
## [31] "dnaJ" "pfor_3" "ispB" "porin" "clpB"
## [36] "rho" "ptsP_E1" "feoB" "mcrA" "korA_3"
## [41] "korB_3" "pfor_like"
desired_order <- c("moaA","psrA","ispB" ,"pyrG","frdB_1","frdC_1","korA_2","korB_2","korA_3","korB_3", "pfor_3","pfor_like","fusA","fusA_2","fusA2","trmD","ibpA","nusA","mcrA" ,"recQ","holA","cmoB","dnaJ","ftsH","rimP","rhlE","rho","htpX","clpB","hupE","HyiB_large","isp1","isp2","HyiA_small","napG","nirS","norB" ,"norB1","norD2","feoB","porin","ptsP_E1", "torF")
sht_DE_new_filtered$short.names <- factor(sht_DE_new_filtered$short.names, levels = desired_order)
sht_DE_new_filtered$clipped_avg_logFC <- pmin(pmax(sht_DE_new_filtered$avg_logFC, -4), 4)
ggplot(sht_DE_new_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+ # specify breaks within this range
labs(title = "Average logFC under sulfide limiting conditions",
x = "Genes",
y = "Average LogFC",
color = "Avg LogFC Value") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
facet_grid(~broad_category,scales = "free",space="free")
######putting them all together
names(sht_DE_new_filtered)
## [1] "locusID" "X" "moduleFil.x"
## [4] "avg_logFC" "treatment_logFC_ave" "short.names"
## [7] "moduleFil.y" "func_category" "broad_category"
## [10] "sd_logFC" "clipped_avg_logFC"
names(shg_DE_av3)
## [1] "X" "locusID" "moduleFil.x"
## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
## [7] "short.names" "moduleFil.y" "func_category"
## [10] "broad_category"
sht_DE <- sht_DE_new_filtered[,c(1,3:10)]
shg_DE <- shg_DE_av3[,c(2:10)]
dim(shg_DE)
## [1] 22 9
dim(sht_DE)
## [1] 42 9
gold_teal <- rbind(sht_DE,shg_DE)
####breaking up this figure by positive and negative fold change
head(gold_teal)
## locusID moduleFil.x avg_logFC treatment_logFC_ave short.names
## 1 gene-L0Y14_RS00400 teal -6.353622 logFC_sulfide_limiting torF
## 2 gene-L0Y14_RS00515 teal 1.860711 logFC_sulfide_limiting norB
## 3 gene-L0Y14_RS00525 teal 2.519077 logFC_sulfide_limiting nirS
## 4 gene-L0Y14_RS00625 teal 1.860711 logFC_sulfide_limiting norB1
## 5 gene-L0Y14_RS00655 teal 2.618491 logFC_sulfide_limiting norD2
## 6 gene-L0Y14_RS01660 teal -2.423910 logFC_sulfide_limiting psrA
## moduleFil.y func_category broad_category
## 1 sulfide_teal_nitro_midnightblue porin transport
## 2 sulfide_teal denitrification nitrogen
## 3 sulfide_teal denitrification nitrogen
## 4 sulfide_teal denitrification nitrogen
## 5 sulfide_teal denitrification nitrogen
## 6 sulfide_teal nucleotide biosynthesis biosynthesis
## sd_logFC
## 1 0.87692331
## 2 0.53357947
## 3 0.57017737
## 4 0.53357947
## 5 0.76846294
## 6 0.03451484
##Filter for avg_logFC > 0
sulfide_up <- gold_teal[gold_teal$avg_logFC > 0, ]
# Filter for avg_logFC < 0
sulfide_down <- gold_teal[gold_teal$avg_logFC < 0, ]
# Filter out 'unknown'
sulfide_up_filtered <- sulfide_up %>%
filter(broad_category != "unknown")
sulfide_down_filtered <- sulfide_down %>%
filter(broad_category != "unknown")
dim(sulfide_up_filtered)
## [1] 46 9
###removing the second frdB, since it was just sig for both the gold and the teal, it has the same fold change and everything...
sulfide_up_filtered <- sulfide_up_filtered[-28,]
###makes a clipping for the color (so its not all washed out)
sulfide_up_filtered$clipped_avg_logFC <- pmin(pmax(sulfide_up_filtered$avg_logFC, -4), 4)
sulfide_down_filtered$clipped_avg_logFC <- pmin(pmax(sulfide_down_filtered$avg_logFC, -4), 4)
#write.csv(sulfide_up_filtered,"sulfide_up_filtered2.csv")
##reordered in excel, faster
SG_short.name_order <- read.csv("gold_teal_up_short.name.order.csv")
dim(SG_short.name_order)
## [1] 45 11
dim(sulfide_up_filtered)
## [1] 45 10
desired_order <- SG_short.name_order[,6]
list(desired_order)
## [[1]]
## [1] "norB" "nirS" "norB1" "norD2" "napG"
## [6] "hupE" "HyiA_small" "isp1" "isp2" "HyiB_large"
## [11] "frdB_1" "frdC_1" "korB_2" "korA_2" "pfor_3"
## [16] "hdrB" "korA_1" "korB_1" "korC" "hdrA"
## [21] "flxD" "flxC" "flxB" "flxA" "aclB_full"
## [26] "aclA" "idh" "acnA" "pntA" "pntB"
## [31] "mdh_1" "ibpA" "ftsH" "fusA2" "holA"
## [36] "htpX" "cmoB" "dnaJ" "porin" "clpB"
## [41] "peptidase" "recN" "ptsP_E1" "feoB" "moaA"
sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = desired_order)
levels(sulfide_up_filtered$short.names)
## [1] "norB" "nirS" "norB1" "norD2" "napG"
## [6] "hupE" "HyiA_small" "isp1" "isp2" "HyiB_large"
## [11] "frdB_1" "frdC_1" "korB_2" "korA_2" "pfor_3"
## [16] "hdrB" "korA_1" "korB_1" "korC" "hdrA"
## [21] "flxD" "flxC" "flxB" "flxA" "aclB_full"
## [26] "aclA" "idh" "acnA" "pntA" "pntB"
## [31] "mdh_1" "ibpA" "ftsH" "fusA2" "holA"
## [36] "htpX" "cmoB" "dnaJ" "porin" "clpB"
## [41] "peptidase" "recN" "ptsP_E1" "feoB" "moaA"
sulfide_up_filtered$short.names <- as.factor(sulfide_up_filtered$short.names)
str(sulfide_up_filtered)
## 'data.frame': 45 obs. of 10 variables:
## $ locusID : chr "gene-L0Y14_RS00515" "gene-L0Y14_RS00525" "gene-L0Y14_RS00625" "gene-L0Y14_RS00655" ...
## $ moduleFil.x : chr "teal" "teal" "teal" "teal" ...
## $ avg_logFC : num 1.86 2.52 1.86 2.62 4.09 ...
## $ treatment_logFC_ave: chr "logFC_sulfide_limiting" "logFC_sulfide_limiting" "logFC_sulfide_limiting" "logFC_sulfide_limiting" ...
## $ short.names : Factor w/ 45 levels "norB","nirS",..: 1 2 3 4 32 33 34 5 45 35 ...
## $ moduleFil.y : chr "sulfide_teal" "sulfide_teal" "sulfide_teal" "sulfide_teal" ...
## $ func_category : chr "denitrification" "denitrification" "denitrification" "denitrification" ...
## $ broad_category : chr "nitrogen" "nitrogen" "nitrogen" "nitrogen" ...
## $ sd_logFC : num 0.534 0.57 0.534 0.768 1.564 ...
## $ clipped_avg_logFC : num 1.86 2.52 1.86 2.62 4 ...
sulfide_up_filtered$short.names
## [1] norB nirS norB1 norD2 ibpA ftsH
## [7] fusA2 napG moaA holA hupE HyiA_small
## [13] isp1 isp2 HyiB_large frdB_1 frdC_1 htpX
## [19] cmoB korB_2 korA_2 dnaJ pfor_3 porin
## [25] clpB ptsP_E1 feoB peptidase recN hdrB
## [31] korA_1 korB_1 korC hdrA flxD flxC
## [37] flxB flxA aclB_full aclA idh acnA
## [43] pntA pntB mdh_1
## 45 Levels: norB nirS norB1 norD2 napG hupE HyiA_small isp1 isp2 ... moaA
##order of the genes in sulfide_up
sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = desired_order)
# Extend the desired order with genes from sulfide_down that might not be in desired_order
extended_order <- unique(c(desired_order, as.character(sulfide_down_filtered$short.names)))
list(extended_order)
## [[1]]
## [1] "norB" "nirS" "norB1" "norD2" "napG"
## [6] "hupE" "HyiA_small" "isp1" "isp2" "HyiB_large"
## [11] "frdB_1" "frdC_1" "korB_2" "korA_2" "pfor_3"
## [16] "hdrB" "korA_1" "korB_1" "korC" "hdrA"
## [21] "flxD" "flxC" "flxB" "flxA" "aclB_full"
## [26] "aclA" "idh" "acnA" "pntA" "pntB"
## [31] "mdh_1" "ibpA" "ftsH" "fusA2" "holA"
## [36] "htpX" "cmoB" "dnaJ" "porin" "clpB"
## [41] "peptidase" "recN" "ptsP_E1" "feoB" "moaA"
## [46] "torF" "psrA" "fusA" "trmD" "pyrG"
## [51] "rimP" "nusA" "fusA_2" "rhlE" "ispB"
## [56] "rho" "mcrA" "korA_3" "korB_3" "pfor_like"
## [61] "glnL" "nirB" "leuC"
# Set factor levels for both datasets based on the extended order
sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = extended_order)
sulfide_down_filtered$short.names <- factor(sulfide_down_filtered$short.names, levels = extended_order)
####sulfide_up plot#################################################################
sulfide_up_plot <- ggplot(sulfide_up_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
labs(title = "Average logFC under sulfide limiting conditions",
x = "Genes",
y = "Average LogFC",
color = "Avg LogFC Value") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
facet_grid(~broad_category,scales = "free",space="free")
####sulfide_down plot#################################################################
sulfide_down_plot <- ggplot(sulfide_down_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
labs(title = "Average logFC under sulfide limiting conditions",
x = "Genes",
y = "Average LogFC",
color = "Avg LogFC Value") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
facet_grid(~broad_category,scales = "free",space="free")
names(sulfide_up_filtered)
## [1] "locusID" "moduleFil.x" "avg_logFC"
## [4] "treatment_logFC_ave" "short.names" "moduleFil.y"
## [7] "func_category" "broad_category" "sd_logFC"
## [10] "clipped_avg_logFC"
names(sulfide_down_filtered)
## [1] "locusID" "moduleFil.x" "avg_logFC"
## [4] "treatment_logFC_ave" "short.names" "moduleFil.y"
## [7] "func_category" "broad_category" "sd_logFC"
## [10] "clipped_avg_logFC"
# Combine the datasets with an identifier for each
sulfide_up_filtered$set <- "Up"
sulfide_down_filtered$set <- "Down"
combined_dataS <- rbind(sulfide_up_filtered, sulfide_down_filtered)
# Reverse the levels so that "Up" comes before "Down"
combined_dataS$set <- factor(combined_dataS$set, levels = c("Up", "Down"))
combined_dataS <- combined_dataS %>%
mutate(adj_avg_logFC = case_when(
set == "Up" & avg_logFC < 0 ~ 0,
set == "Down" & avg_logFC > 0 ~ 0,
TRUE ~ avg_logFC
))
# Plotting
p <- ggplot(combined_dataS, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
geom_point(size = 3) +
geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
labs(title = "Average logFC under sulfide limiting conditions",
x = "Adjusted Average LogFC",
y = "Genes",
color = "Avg LogFC Value") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_grid(cols = vars(set), rows= vars(broad_category), scales = "free_y", space = "free_y")
print(p)
################################OXYGEN PURPLE ANDBLACK##################################################
#####now doing this for purple oxy
#names(ohp_DE3)
table(ohp_DE3$short.names)
##
## bamD cphA cvpA dprE1 FBP fusA2 GH16 hupE hybO hypE hypo
## 2 2 1 2 1 2 1 2 2 2 8
## isp2 mrcA nifJ ppdK rocR SgpA_1 tsaC yidC yidD
## 2 2 2 2 2 2 2 2 1
ohp_DE3_av <- ohp_DE3[,c(1,49,55,59,57)]
head(ohp_DE3_av)
## locusID logFC comparison moduleFil short.names
## 1 gene-L0Y14_RS05880 -4.039158 LO_vs_HO_HS_N purple hypo
## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN purple hypo
## 3 gene-L0Y14_RS06635 -5.296420 LO_vs_HO_HS_N purple dprE1
## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN purple dprE1
## 5 gene-L0Y14_RS06820 2.418749 LO_vs_HO_H2_noN purple fusA2
## 6 gene-L0Y14_RS06820 2.176861 LO_vs_HO_HS_N purple fusA2
####dealing with the fact I have more than one genes that are called "hypo"
# Assign a unique number for each distinct locusID with "hypo" genes
locus_mapping <- ohp_DE3_av %>%
filter(short.names == "hypo") %>%
distinct(locusID) %>%
mutate(hypo_num = row_number())
# Join this with the original dataframe and replace "hypo" genes
new_df <- ohp_DE3_av %>%
left_join(locus_mapping, by = "locusID") %>%
mutate(short.names = ifelse(!is.na(hypo_num) & short.names == "hypo",
paste0("hypo", hypo_num),
short.names)) %>%
select(-hypo_num) # Remove the auxiliary column
# Print the result
head(new_df)
## locusID logFC comparison moduleFil short.names
## 1 gene-L0Y14_RS05880 -4.039158 LO_vs_HO_HS_N purple hypo1
## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN purple hypo1
## 3 gene-L0Y14_RS06635 -5.296420 LO_vs_HO_HS_N purple dprE1
## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN purple dprE1
## 5 gene-L0Y14_RS06820 2.418749 LO_vs_HO_H2_noN purple fusA2
## 6 gene-L0Y14_RS06820 2.176861 LO_vs_HO_HS_N purple fusA2
pox <- new_df
avg_logFC_df <- new_df %>%
group_by(locusID, moduleFil) %>%
summarise(avg_logFC = mean(logFC, na.rm = TRUE),
sd_logFC = sd(logFC, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
###adding the n value too
avg_logFC_df <- new_df %>%
group_by(locusID, moduleFil) %>%
summarise(
n = n(), # Add the count of non-NA values
avg_logFC = mean(logFC, na.rm = TRUE),
sd_logFC = sd(logFC, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 5
## locusID moduleFil n avg_logFC sd_logFC
## <chr> <chr> <int> <dbl> <dbl>
## 1 gene-L0Y14_RS05880 purple 2 -3.85 0.270
## 2 gene-L0Y14_RS06635 purple 2 -4.79 0.718
## 3 gene-L0Y14_RS06820 purple 2 2.30 0.171
## 4 gene-L0Y14_RS07045 purple 2 2.42 0.107
## 5 gene-L0Y14_RS07050 purple 2 4.01 1.56
## 6 gene-L0Y14_RS07420 purple 2 3.16 0.175
avg_logFC_df <- avg_logFC_df %>%
mutate(treatment_logFC_ave = "logFC_oxygen_limiting")
# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 6
## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave
## <chr> <chr> <int> <dbl> <dbl> <chr>
## 1 gene-L0Y14_RS05880 purple 2 -3.85 0.270 logFC_oxygen_limiting
## 2 gene-L0Y14_RS06635 purple 2 -4.79 0.718 logFC_oxygen_limiting
## 3 gene-L0Y14_RS06820 purple 2 2.30 0.171 logFC_oxygen_limiting
## 4 gene-L0Y14_RS07045 purple 2 2.42 0.107 logFC_oxygen_limiting
## 5 gene-L0Y14_RS07050 purple 2 4.01 1.56 logFC_oxygen_limiting
## 6 gene-L0Y14_RS07420 purple 2 3.16 0.175 logFC_oxygen_limiting
ohp_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
names(ohp_DE_av2)
## [1] "locusID" "moduleFil.x" "n"
## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
## [7] "short.names" "moduleFil.y" "func_category"
poxn <- ohp_DE_av2[,c(1,3)]
######now doing this with oxy black#################
#names(ohb2_DE3)
ohb2_DE3_av <- ohb2_DE3[,c(1,49,55,59,57)]
head(ohb2_DE3_av)
## locusID logFC comparison moduleFil3 short.names
## 1 gene-L0Y14_RS00450 -2.998400 LO_vs_HO_HS_N black_oxy Hr
## 2 gene-L0Y14_RS03075 -4.498757 LO_vs_HO_HS_N black_oxy hypo
## 3 gene-L0Y14_RS03375 -3.606201 LO_vs_HO_HS_N black_oxy fabA
## 4 gene-L0Y14_RS03820 -4.795972 LO_vs_HO_HS_N black_oxy HIT
## 5 gene-L0Y14_RS04045 -3.506534 LO_vs_HO_HS_N black_oxy cbbQ
## 6 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN black_oxy hypo
names(ohb2_DE3_av)[4] <- "moduleFil"
# again, I have the hypo issue
# Assign a unique number for each distinct locusID with "hypo" genes
locus_mapping <- ohb2_DE3_av %>%
filter(short.names == "hypo") %>%
distinct(locusID) %>%
mutate(hypo_num = row_number())
# Join this with the original dataframe and replace "hypo" genes
new_df <- ohb2_DE3_av %>%
left_join(locus_mapping, by = "locusID") %>%
mutate(short.names = ifelse(!is.na(hypo_num) & short.names == "hypo",
paste0("hypo", hypo_num),
short.names)) %>%
select(-hypo_num) # Remove the auxiliary column
box <- new_df
avg_logFC_df <- new_df %>%
group_by(locusID, moduleFil) %>%
summarise(avg_logFC = mean(logFC, na.rm = TRUE),
sd_logFC = sd(logFC, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
avg_logFC_df <- new_df %>%
group_by(locusID, moduleFil) %>%
summarise(
n = n(), # Add the count of non-NA values
avg_logFC = mean(logFC, na.rm = TRUE),
sd_logFC = sd(logFC, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
avg_logFC_df <- avg_logFC_df %>%
mutate(treatment_logFC_ave = "logFC_oxygen_limiting")
# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 6
## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave
## <chr> <chr> <int> <dbl> <dbl> <chr>
## 1 gene-L0Y14_RS00450 black_oxy 1 -3.00 NA logFC_oxygen_limiting
## 2 gene-L0Y14_RS03075 black_oxy 1 -4.50 NA logFC_oxygen_limiting
## 3 gene-L0Y14_RS03375 black_oxy 1 -3.61 NA logFC_oxygen_limiting
## 4 gene-L0Y14_RS03820 black_oxy 1 -4.80 NA logFC_oxygen_limiting
## 5 gene-L0Y14_RS04045 black_oxy 1 -3.51 NA logFC_oxygen_limiting
## 6 gene-L0Y14_RS05880 black_oxy 2 -3.85 0.270 logFC_oxygen_limiting
ohb_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
boxn <- ohb_DE_av2[,c(1,3)]
oxy_purple_black <- rbind(ohb_DE_av2,ohp_DE_av2)
names(oxy_purple_black)
## [1] "locusID" "moduleFil.x" "n"
## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
## [7] "short.names" "moduleFil.y" "func_category"
#write.csv(oxy_purple_black,"oxy_purple_black2.csv")
###added broad_category, removed duplicate hypo gene
oxy_purple_black2 <- read.csv("oxy_purple_black2.csv")
##Filter for avg_logFC > 0
oxy_up <- oxy_purple_black2[oxy_purple_black2$avg_logFC > 0, ]
# Filter for avg_logFC < 0
oxy_down <- oxy_purple_black2[oxy_purple_black2$avg_logFC < 0, ]
# Filter out 'unknown'
oxy_up_filtered <- oxy_up %>%
filter(func_category != "unknown")
oxy_down_filtered <- oxy_down %>%
filter(func_category != "unknown")
dim(oxy_up_filtered)
## [1] 11 10
###makes a clipping for the color (so its not all washed out)
oxy_up_filtered$clipped_avg_logFC <- pmin(pmax(oxy_up_filtered$avg_logFC, -4), 4)
oxy_down_filtered$clipped_avg_logFC <- pmin(pmax(oxy_down_filtered$avg_logFC, -4), 4)
# Combine the datasets with an identifier for each
oxy_up_filtered$set <- "Up"
oxy_down_filtered$set <- "Down"
combined_dataO <- rbind(oxy_up_filtered, oxy_down_filtered)
# Reverse the levels so that "Up" comes before "Down"
combined_dataO$set <- factor(combined_dataO$set, levels = c("Up", "Down"))
combined_dataO <- combined_dataO %>%
mutate(adj_avg_logFC = case_when(
set == "Up" & avg_logFC < 0 ~ 0,
set == "Down" & avg_logFC > 0 ~ 0,
TRUE ~ avg_logFC
))
# Plotting
p <- ggplot(combined_dataO, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
geom_point(size = 3) +
geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
labs(title = "Average logFC under oxygen limiting conditions",
x = "Adjusted Average LogFC",
y = "Genes",
color = "Avg LogFC Value") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_grid(cols = vars(set), rows= vars(func_category), scales = "free_y", space = "free_y")
print(p)
####trying to combine both plots so I have same persepectives for figure
names(combined_dataO)
## [1] "X" "locusID" "moduleFil.x"
## [4] "n" "avg_logFC" "sd_logFC"
## [7] "treatment_logFC_ave" "short.names" "moduleFil.y"
## [10] "func_category" "clipped_avg_logFC" "set"
## [13] "adj_avg_logFC"
combined_dataO <- combined_dataO[,c("locusID","moduleFil.x","moduleFil.y","avg_logFC","sd_logFC","treatment_logFC_ave","short.names","func_category","clipped_avg_logFC","adj_avg_logFC", "set")]
combined_dataS <- combined_dataS[,c("locusID","moduleFil.x","moduleFil.y","avg_logFC","sd_logFC","treatment_logFC_ave","short.names","func_category","clipped_avg_logFC","adj_avg_logFC", "set")]
names(combined_dataS)
## [1] "locusID" "moduleFil.x" "moduleFil.y"
## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
## [7] "short.names" "func_category" "clipped_avg_logFC"
## [10] "adj_avg_logFC" "set"
combinedOS <- rbind(combined_dataO,combined_dataS)
# Plotting
OS <- ggplot(combinedOS, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
geom_point(size = 3) +
geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
labs(title = "Average logFC under limiting conditions",
x = "Adjusted Average LogFC",
y = "Genes",
color = "Avg LogFC Value") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_grid(cols = vars(treatment_logFC_ave), rows= vars(func_category), scales = "free_y", space = "free_y")
print(OS)
###remaking figure 6 for paper such that all of the data points (n= condition tereatments)
###here are the genes in the gold module correlated to sulfide
all_together <- read.csv("wgcna_trait_module_hub_genes.csv")
#names(all_together)
all_together <- all_together[,1:4]
shg_DE_av <- shg_DE2[,c(1,44,50,53)]
head(shg_DE_av)
## locusID logFC comparison moduleFil
## 1 gene-L0Y14_RS10410 -6.607549 LS_vs_HS_noN_HO darkgoldenrod1
## 2 gene-L0Y14_RS00080 -3.742340 LS_vs_HS_noN_HO darkgoldenrod1
## 3 gene-L0Y14_RS01985 -5.245126 LS_vs_HS_noN_HO darkgoldenrod1
## 4 gene-L0Y14_RS07695 4.361391 LS_vs_HS_noN_HO darkgoldenrod1
## 5 gene-L0Y14_RS11995 2.598792 LS_vs_HS_noN_HO darkgoldenrod1
## 6 gene-L0Y14_RS11990 2.739793 LS_vs_HS_noN_HO darkgoldenrod1
#names(shg_DE_av)
###here are the genes in the teal module correlated to sulfide
names(sht_DE3)
## [1] "X.1" "locusID"
## [3] "geneid" "start.x"
## [5] "stop.x" "gene_letters_andre"
## [7] "accession_andre" "start"
## [9] "stop" "notes"
## [11] "Ontology" "Inference"
## [13] "product" "protein_id"
## [15] "gene_letters.notes" "NCBI_ID"
## [17] "annotations_from_andre" "target_id"
## [19] "Accession" "functional_role"
## [21] "sub_role" "sub_role2"
## [23] "Protein" "Accession...Protein"
## [25] "seed_ortholog_evalue" "seed_ortholog_score"
## [27] "best_tax_level" "Preferred_name"
## [29] "GOs" "X"
## [31] "KEGG_ko" "KEGG_Pathway"
## [33] "KEGG_Module" "KEGG_Reaction"
## [35] "KEGG_rclass" "BRITE"
## [37] "KEGG_TC" "CAZy"
## [39] "BiGG_Reaction" "tax_scope"
## [41] "eggNOG_OGs" "bestOG"
## [43] "COG_Functional_Category" "eggNOG_free_text_description"
## [45] "logFC" "AveExpr"
## [47] "t" "P.value"
## [49] "adj.P.Val" "B"
## [51] "comparison" "Predicted_function"
## [53] "condition" "moduleFil"
v <- unique(sht_DE3$locusID)
sht_DE3_av <- sht_DE3[,c(2,45,51,54)]
head(sht_DE3_av)
## locusID logFC comparison moduleFil
## 1 gene-L0Y14_RS00400 -5.671978 LS_vs_HS_noN_HO teal
## 2 gene-L0Y14_RS01755 -1.093415 LS_vs_HS_noN_HO teal
## 3 gene-L0Y14_RS04610 -1.158590 LS_vs_HS_noN_HO teal
## 4 gene-L0Y14_RS05075 -1.192507 LS_vs_HS_noN_HO teal
## 5 gene-L0Y14_RS05080 -2.083575 LS_vs_HS_noN_HO teal
## 6 gene-L0Y14_RS07060 -1.072405 LS_vs_HS_noN_HO teal
#names(sht_DE3_av)
sulfide_gt <- rbind(sht_DE3_av,shg_DE_av)
#names(sht_DE3_av)
#names(all_together)
shortnames <- all_together[,1:2]
sulfideGT <- merge(sulfide_gt,shortnames)
list(unique(sulfideGT$short.names))
## [[1]]
## [1] "glnL" "torF" "norB" "nirS"
## [5] "norB1" "norD2" "psrA" "fusA"
## [9] "nirB" "trmD" "YqgE/AlgH" "ibpA"
## [13] "pyrG" "ftsH" "rimP" "nusA"
## [17] "fusA2" "napG" "moaA" "DUF4340"
## [21] "fusA_2" "holA" "hupE" "HyiA_small"
## [25] "isp1" "isp2" "HyiB_large" "hypo"
## [29] "frdB_1" "frdC_1" "htpX" "cmoB"
## [33] "peptidase" "leuC" "ferritin_like_AB" "korB_2"
## [37] "korA_2" "rhlE" "DUF2058" "dnaJ"
## [41] "recN" "hdrB" "korA_1" "korB_1"
## [45] "korC" "hdrA" "flxD" "flxC"
## [49] "flxB" "flxA" "aclB_full" "aclA"
## [53] "idh" "acnA" "pntA" "pntB"
## [57] "mdh_1" "pfor_3" "ispB" "porin"
## [61] "clpB" "DUF4126" "rho" "ptsP_E1"
## [65] "feoB" "mcrA" "korA_3" "korB_3"
## [69] "pfor_like"
desired_order2 <- c("pntB","pntA","mdh_1","korC","korB_1","korA_1","idh","hdrB", "hdrA", "flxD","flxC","flxB", "flxA","acnA","aclB_full","aclA","korB_2","korA_2","korB_3","korA_3","pfor_like","pfor_3","frdC_1" ,"frdB_1","HyiB_large","isp1","isp2","HyiA_small", "hupE","hypo","norD2","norB1" ,"norB", "nirS","napG","nirB","glnL","fusA2","mcrA","fusA_2","fusA","trmD","rimP","rho","rhlE","recN","peptidase","nusA","ibpA","htpX","holA","ftsH", "dnaJ","cmoB","clpB","pyrG","psrA","moaA","leuC","ispB","torF","ptsP_E1","porin","feoB")
filteredGT <- subset(sulfideGT, short.names %in% desired_order2)
filteredGT$short.names <- factor(filteredGT$short.names, levels = desired_order2)
filteredGT$clipped_logFC <- pmin(pmax(filteredGT$logFC, -4), 4)
try <- ggplot(data=filteredGT, aes(x= logFC, y = fct_rev(short.names))) +
geom_boxplot()+
geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
theme_minimal()
try
table(sulfideGT$comparison,sulfideGT$short.names)
##
## aclA aclB_full acnA clpB cmoB dnaJ DUF2058 DUF4126 DUF4340
## H2_vs_HS_N_HO 0 0 0 1 1 1 1 1 1
## H2_vs_HS_noN_HO 1 1 1 1 1 1 0 0 0
## LS_vs_HS_N_HO 0 0 0 1 0 0 1 1 0
## LS_vs_HS_noN_HO 1 1 1 1 0 1 0 0 0
## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1
##
## feoB ferritin_like_AB flxA flxB flxC flxD frdB_1 frdC_1 ftsH
## H2_vs_HS_N_HO 1 1 0 0 0 0 2 1 1
## H2_vs_HS_noN_HO 0 1 1 1 1 1 2 1 1
## LS_vs_HS_N_HO 1 0 0 0 0 0 0 0 1
## LS_vs_HS_noN_HO 0 0 1 1 1 1 2 1 0
## SW_vs_HS_noN_HO 1 1 1 1 1 1 2 1 1
##
## fusA fusA_2 fusA2 glnL hdrA hdrB holA htpX hupE HyiA_small
## H2_vs_HS_N_HO 1 1 1 0 0 0 1 1 1 1
## H2_vs_HS_noN_HO 1 1 1 1 1 1 1 1 0 1
## LS_vs_HS_N_HO 0 0 0 0 0 0 0 1 0 1
## LS_vs_HS_noN_HO 1 1 1 1 1 1 0 0 0 0
## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 0 1
##
## HyiB_large hypo ibpA idh isp1 isp2 ispB korA_1 korA_2 korA_3
## H2_vs_HS_N_HO 1 2 1 0 1 1 1 0 1 1
## H2_vs_HS_noN_HO 1 2 1 1 1 1 1 1 1 1
## LS_vs_HS_N_HO 1 1 1 0 1 1 1 0 0 1
## LS_vs_HS_noN_HO 0 0 0 1 0 0 0 1 0 1
## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1 1
##
## korB_1 korB_2 korB_3 korC leuC mcrA mdh_1 moaA napG nirB nirS
## H2_vs_HS_N_HO 0 1 1 0 0 1 0 1 1 0 1
## H2_vs_HS_noN_HO 1 1 1 1 1 0 1 0 1 1 1
## LS_vs_HS_N_HO 0 0 0 0 0 0 0 0 0 0 1
## LS_vs_HS_noN_HO 1 0 1 1 1 0 1 0 0 1 0
## SW_vs_HS_noN_HO 1 1 1 1 1 0 1 1 1 1 1
##
## norB norB1 norD2 nusA peptidase pfor_3 pfor_like pntA pntB
## H2_vs_HS_N_HO 1 1 1 1 0 1 1 0 0
## H2_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1
## LS_vs_HS_N_HO 0 0 0 1 0 0 1 0 0
## LS_vs_HS_noN_HO 0 0 1 1 1 0 0 1 1
## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1
##
## porin psrA ptsP_E1 pyrG recN rhlE rho rimP torF trmD
## H2_vs_HS_N_HO 1 1 1 1 1 1 1 1 1 1
## H2_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1 1
## LS_vs_HS_N_HO 1 0 0 0 0 1 0 0 0 0
## LS_vs_HS_noN_HO 1 0 1 1 1 0 0 1 1 0
## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1 1
##
## YqgE/AlgH
## H2_vs_HS_N_HO 1
## H2_vs_HS_noN_HO 1
## LS_vs_HS_N_HO 0
## LS_vs_HS_noN_HO 0
## SW_vs_HS_noN_HO 1
###### now doing this for oxygen with purple and black modules, which I named pox and box
pb_ox <- rbind(pox,box)
###changing "hypo2 to another name so it wont get filtered out
pb_ox <- pb_ox %>%
mutate(short.names = ifelse(short.names == "hypo2", "unknown_hyd1e", short.names))
####oops this caught the hypo2 from the other module as well, so filtering that out
pb_ox <- pb_ox %>%
filter(!grepl("gene-L0Y14_RS03075", locusID))
####getting rid of the hypotheticals
###in case I want this list later
hypos_fil <- pb_ox %>%
filter(grepl("^hypo", short.names))
###ok, again with the filter
pb_ox_fil <- pb_ox %>%
filter(!grepl("^hypo", short.names))
###checking if genes are overlapping (meaning found in both modules....they aren't)
#table(pb_ox_fil$moduleFil,pb_ox_fil$short.names)
#print(unique(pb_ox_fil$short.names))
desired_order3 <- c("nifJ","ppdK","malQ","GH16","acnB","fabA","unknown_hyd1e","hypE","isp2","hybO","hupE","cphA","SgpA_1","fusA2","mrcA","yidD","yidC","yebA","tsaC","rocR","rpmB","prsK","LT_domain","lolA_like","IscA","HIT","himD","gmhA","ftsE","dut","cbbQ","bamD","Prxs","Hr","fliK1","FBP")
#list(desired_order3)
####ok, some of these are unknowns that I left off of the fgure, so I am going to filter them out
pb_ox_fil2 <- subset(pb_ox_fil, short.names %in% desired_order3)
pb_ox_fil2$short.names <- factor(pb_ox_fil2$short.names, levels = desired_order3)
pb_ox_fil2$clipped_logFC <- pmin(pmax(pb_ox_fil2$logFC, -4), 4)
try2 <- ggplot(data=pb_ox_fil2, aes(x= logFC, y = fct_rev(short.names))) +
geom_boxplot()+
geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
scale_x_reverse()+
theme_minimal()
try2
table(pb_ox_fil2$comparison)
##
## LO_vs_HO_H2_noN LO_vs_HO_HS_N
## 19 30
####ok, I have a problem, I can't get the dimensions to match for adobe illustrator, so adding dummy variables to save me time in illustrator
# Define the number of dummy entries you need
num_dummy <- 33
# Creating the dummy data frame with unique dummy names
dummy_data2 <- data.frame(
locusID= rep("fakeid",num_dummy),
logFC = rep(0, num_dummy), # Sets all logFC values to 0
comparison=rep("fakecomparison",num_dummy),
moduleFil = rep("fakemoduleFil",num_dummy),
short.names = paste("Dummy", 1:num_dummy, sep=""),
clipped_logFC = rep(4, num_dummy)# Creates "Dummy1", "Dummy2", ..., "Dummy33"
)
pb_ox_filDUMM <- rbind(pb_ox_fil2,dummy_data2)
try3 <- ggplot(data=pb_ox_filDUMM, aes(x= logFC, y = fct_rev(short.names))) +
geom_boxplot()+
geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
scale_x_reverse()+
theme_minimal()
try3